2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
33 #define _PTHREAD_CONDATTR_T
34 #define _PTHREAD_COND_T
35 #define _PTHREAD_MUTEXATTR_T
36 #define _PTHREAD_MUTEX_T
37 #define _PTHREAD_RWLOCKATTR_T
38 #define _PTHREAD_RWLOCK_T
40 #undef pthread_mutexattr_t
41 #undef pthread_mutex_t
42 #undef pthread_condattr_t
44 #undef pthread_rwlockattr_t
45 #undef pthread_rwlock_t
47 #include <sys/param.h>
48 #include <sys/queue.h>
49 #include <sys/resourcevar.h>
50 //#include <sys/proc_internal.h>
51 #include <sys/kauth.h>
52 #include <sys/systm.h>
53 #include <sys/timeb.h>
54 #include <sys/times.h>
56 #include <sys/kernel.h>
58 #include <sys/signalvar.h>
59 #include <sys/sysctl.h>
60 #include <sys/syslog.h>
63 #include <sys/kdebug.h>
64 //#include <sys/sysproto.h>
66 #include <sys/user.h> /* for coredump */
67 #include <sys/proc_info.h> /* for fill_procworkqueue */
70 #include <mach/mach_port.h>
71 #include <mach/mach_types.h>
72 #include <mach/semaphore.h>
73 #include <mach/sync_policy.h>
74 #include <mach/task.h>
75 #include <mach/vm_prot.h>
76 #include <kern/kern_types.h>
77 #include <kern/task.h>
78 #include <kern/clock.h>
79 #include <mach/kern_return.h>
80 #include <kern/thread.h>
81 #include <kern/sched_prim.h>
82 #include <kern/kalloc.h>
83 #include <kern/sched_prim.h> /* for thread_exception_return */
84 #include <kern/processor.h>
85 #include <kern/assert.h>
86 #include <mach/mach_vm.h>
87 #include <mach/mach_param.h>
88 #include <mach/thread_status.h>
89 #include <mach/thread_policy.h>
90 #include <mach/message.h>
91 #include <mach/port.h>
92 //#include <vm/vm_protos.h>
93 #include <vm/vm_fault.h>
94 #include <vm/vm_map.h>
95 #include <mach/thread_act.h> /* for thread_resume */
96 #include <machine/machine_routines.h>
98 #include <libkern/OSAtomic.h>
100 #include <sys/pthread_shims.h>
101 #include "kern_internal.h"
103 uint32_t pthread_debug_tracing
= 0;
105 SYSCTL_INT(_kern
, OID_AUTO
, pthread_debug_tracing
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
106 &pthread_debug_tracing
, 0, "")
108 // XXX: Dirty import for sys/signarvar.h that's wrapped in BSD_KERNEL_PRIVATE
109 #define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP))
111 lck_grp_attr_t
*pthread_lck_grp_attr
;
112 lck_grp_t
*pthread_lck_grp
;
113 lck_attr_t
*pthread_lck_attr
;
115 extern void thread_set_cthreadself(thread_t thread
, uint64_t pself
, int isLP64
);
116 extern void workqueue_thread_yielded(void);
118 static boolean_t
workqueue_run_nextreq(proc_t p
, struct workqueue
*wq
, thread_t th
, boolean_t force_oc
,
119 boolean_t overcommit
, pthread_priority_t oc_prio
);
121 static boolean_t
workqueue_run_one(proc_t p
, struct workqueue
*wq
, boolean_t overcommit
, pthread_priority_t priority
);
123 static void wq_runreq(proc_t p
, boolean_t overcommit
, pthread_priority_t priority
, thread_t th
, struct threadlist
*tl
,
124 int reuse_thread
, int wake_thread
, int return_directly
);
126 static int _setup_wqthread(proc_t p
, thread_t th
, boolean_t overcommit
, pthread_priority_t priority
, int reuse_thread
, struct threadlist
*tl
);
128 static void wq_unpark_continue(void);
129 static void wq_unsuspend_continue(void);
131 static boolean_t
workqueue_addnewthread(struct workqueue
*wq
, boolean_t oc_thread
);
132 static void workqueue_removethread(struct threadlist
*tl
, int fromexit
);
133 static void workqueue_lock_spin(proc_t
);
134 static void workqueue_unlock(proc_t
);
136 int proc_settargetconc(pid_t pid
, int queuenum
, int32_t targetconc
);
137 int proc_setalltargetconc(pid_t pid
, int32_t * targetconcp
);
139 #define WQ_MAXPRI_MIN 0 /* low prio queue num */
140 #define WQ_MAXPRI_MAX 2 /* max prio queuenum */
141 #define WQ_PRI_NUM 3 /* number of prio work queues */
143 #define C_32_STK_ALIGN 16
144 #define C_64_STK_ALIGN 16
145 #define C_64_REDZONE_LEN 128
146 #define TRUNC_DOWN32(a,c) ((((uint32_t)a)-(c)) & ((uint32_t)(-(c))))
147 #define TRUNC_DOWN64(a,c) ((((uint64_t)a)-(c)) & ((uint64_t)(-(c))))
150 * Flags filed passed to bsdthread_create and back in pthread_start
151 31 <---------------------------------> 0
152 _________________________________________
153 | flags(8) | policy(8) | importance(16) |
154 -----------------------------------------
157 #define PTHREAD_START_CUSTOM 0x01000000
158 #define PTHREAD_START_SETSCHED 0x02000000
159 #define PTHREAD_START_DETACHED 0x04000000
160 #define PTHREAD_START_QOSCLASS 0x08000000
161 #define PTHREAD_START_QOSCLASS_MASK 0xffffff
162 #define PTHREAD_START_POLICY_BITSHIFT 16
163 #define PTHREAD_START_POLICY_MASK 0xff
164 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
166 #define SCHED_OTHER POLICY_TIMESHARE
167 #define SCHED_FIFO POLICY_FIFO
168 #define SCHED_RR POLICY_RR
171 _bsdthread_create(struct proc
*p
, user_addr_t user_func
, user_addr_t user_funcarg
, user_addr_t user_stack
, user_addr_t user_pthread
, uint32_t flags
, user_addr_t
*retval
)
177 mach_vm_offset_t stackaddr
;
178 mach_vm_size_t th_allocsize
= 0;
179 mach_vm_size_t user_stacksize
;
180 mach_vm_size_t th_stacksize
;
181 mach_vm_size_t th_guardsize
;
182 mach_vm_offset_t th_stackaddr
;
183 mach_vm_offset_t th_stack
;
184 mach_vm_offset_t th_pthread
;
185 mach_port_name_t th_thport
;
187 vm_map_t vmap
= pthread_kern
->current_map();
188 task_t ctask
= current_task();
189 unsigned int policy
, importance
;
193 if (pthread_kern
->proc_get_register(p
) == 0) {
197 PTHREAD_TRACE(TRACE_pthread_thread_create
| DBG_FUNC_START
, flags
, 0, 0, 0, 0);
199 isLP64
= proc_is64bit(p
);
200 th_guardsize
= vm_map_page_size(vmap
);
202 #if defined(__i386__) || defined(__x86_64__)
203 stackaddr
= 0xB0000000;
205 #error Need to define a stack address hint for this architecture
207 kret
= pthread_kern
->thread_create(ctask
, &th
);
208 if (kret
!= KERN_SUCCESS
)
210 thread_reference(th
);
212 sright
= (void *)pthread_kern
->convert_thread_to_port(th
);
213 th_thport
= pthread_kern
->ipc_port_copyout_send(sright
, pthread_kern
->task_get_ipcspace(ctask
));
215 if ((flags
& PTHREAD_START_CUSTOM
) == 0) {
216 th_stacksize
= (mach_vm_size_t
)user_stack
; /* if it is custom them it is stacksize */
217 th_allocsize
= th_stacksize
+ th_guardsize
+ pthread_kern
->proc_get_pthsize(p
);
219 kret
= mach_vm_map(vmap
, &stackaddr
,
222 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
, NULL
,
223 0, FALSE
, VM_PROT_DEFAULT
, VM_PROT_ALL
,
225 if (kret
!= KERN_SUCCESS
)
226 kret
= mach_vm_allocate(vmap
,
227 &stackaddr
, th_allocsize
,
228 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
);
229 if (kret
!= KERN_SUCCESS
) {
234 PTHREAD_TRACE(TRACE_pthread_thread_create
|DBG_FUNC_NONE
, th_allocsize
, stackaddr
, 0, 2, 0);
236 th_stackaddr
= stackaddr
;
239 * The guard page is at the lowest address
240 * The stack base is the highest address
242 kret
= mach_vm_protect(vmap
, stackaddr
, th_guardsize
, FALSE
, VM_PROT_NONE
);
244 if (kret
!= KERN_SUCCESS
) {
248 th_stack
= (stackaddr
+ th_stacksize
+ th_guardsize
);
249 th_pthread
= (stackaddr
+ th_stacksize
+ th_guardsize
);
250 user_stacksize
= th_stacksize
;
253 * Pre-fault the first page of the new thread's stack and the page that will
254 * contain the pthread_t structure.
257 vm_map_trunc_page_mask(th_stack
- PAGE_SIZE_64
, vm_map_page_mask(vmap
)),
258 VM_PROT_READ
| VM_PROT_WRITE
,
260 THREAD_UNINT
, NULL
, 0);
263 vm_map_trunc_page_mask(th_pthread
, vm_map_page_mask(vmap
)),
264 VM_PROT_READ
| VM_PROT_WRITE
,
266 THREAD_UNINT
, NULL
, 0);
268 th_stack
= user_stack
;
269 user_stacksize
= user_stack
;
270 th_pthread
= user_pthread
;
272 PTHREAD_TRACE(TRACE_pthread_thread_create
|DBG_FUNC_NONE
, 0, 0, 0, 3, 0);
275 #if defined(__i386__) || defined(__x86_64__)
277 * Set up i386 registers & function call.
280 x86_thread_state32_t state
;
281 x86_thread_state32_t
*ts
= &state
;
283 ts
->eip
= (unsigned int)pthread_kern
->proc_get_threadstart(p
);
284 ts
->eax
= (unsigned int)th_pthread
;
285 ts
->ebx
= (unsigned int)th_thport
;
286 ts
->ecx
= (unsigned int)user_func
;
287 ts
->edx
= (unsigned int)user_funcarg
;
288 ts
->edi
= (unsigned int)user_stacksize
;
289 ts
->esi
= (unsigned int)flags
;
293 ts
->esp
= (int)((vm_offset_t
)(th_stack
-C_32_STK_ALIGN
));
295 error
= pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)ts
);
296 if (error
!= KERN_SUCCESS
) {
301 x86_thread_state64_t state64
;
302 x86_thread_state64_t
*ts64
= &state64
;
304 ts64
->rip
= (uint64_t)pthread_kern
->proc_get_threadstart(p
);
305 ts64
->rdi
= (uint64_t)th_pthread
;
306 ts64
->rsi
= (uint64_t)(th_thport
);
307 ts64
->rdx
= (uint64_t)user_func
;
308 ts64
->rcx
= (uint64_t)user_funcarg
;
309 ts64
->r8
= (uint64_t)user_stacksize
;
310 ts64
->r9
= (uint64_t)flags
;
312 * set stack pointer aligned to 16 byte boundary
314 ts64
->rsp
= (uint64_t)(th_stack
- C_64_REDZONE_LEN
);
316 error
= pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)ts64
);
317 if (error
!= KERN_SUCCESS
) {
323 #elif defined(__arm__)
324 arm_thread_state_t state
;
325 arm_thread_state_t
*ts
= &state
;
327 ts
->pc
= (int)pthread_kern
->proc_get_threadstart(p
);
328 ts
->r
[0] = (unsigned int)th_pthread
;
329 ts
->r
[1] = (unsigned int)th_thport
;
330 ts
->r
[2] = (unsigned int)user_func
;
331 ts
->r
[3] = (unsigned int)user_funcarg
;
332 ts
->r
[4] = (unsigned int)user_stacksize
;
333 ts
->r
[5] = (unsigned int)flags
;
335 /* Set r7 & lr to 0 for better back tracing */
342 ts
->sp
= (int)((vm_offset_t
)(th_stack
-C_32_STK_ALIGN
));
344 (void) pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)ts
);
347 #error bsdthread_create not defined for this architecture
350 if ((flags
& PTHREAD_START_SETSCHED
) != 0) {
351 /* Set scheduling parameters if needed */
352 thread_extended_policy_data_t extinfo
;
353 thread_precedence_policy_data_t precedinfo
;
355 importance
= (flags
& PTHREAD_START_IMPORTANCE_MASK
);
356 policy
= (flags
>> PTHREAD_START_POLICY_BITSHIFT
) & PTHREAD_START_POLICY_MASK
;
358 if (policy
== SCHED_OTHER
) {
359 extinfo
.timeshare
= 1;
361 extinfo
.timeshare
= 0;
364 thread_policy_set(th
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extinfo
, THREAD_EXTENDED_POLICY_COUNT
);
366 #define BASEPRI_DEFAULT 31
367 precedinfo
.importance
= (importance
- BASEPRI_DEFAULT
);
368 thread_policy_set(th
, THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&precedinfo
, THREAD_PRECEDENCE_POLICY_COUNT
);
369 } else if ((flags
& PTHREAD_START_QOSCLASS
) != 0) {
370 /* Set thread QoS class if requested. */
371 pthread_priority_t priority
= (pthread_priority_t
)(flags
& PTHREAD_START_QOSCLASS_MASK
);
373 thread_qos_policy_data_t qos
;
374 qos
.qos_tier
= pthread_priority_get_qos_class(priority
);
375 qos
.tier_importance
= (qos
.qos_tier
== QOS_CLASS_UNSPECIFIED
) ? 0 :
376 _pthread_priority_get_relpri(priority
);
378 pthread_kern
->thread_policy_set_internal(th
, THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, THREAD_QOS_POLICY_COUNT
);
381 kret
= pthread_kern
->thread_resume(th
);
382 if (kret
!= KERN_SUCCESS
) {
386 thread_deallocate(th
); /* drop the creator reference */
388 PTHREAD_TRACE(TRACE_pthread_thread_create
|DBG_FUNC_END
, error
, th_pthread
, 0, 0, 0);
390 *retval
= th_pthread
;
395 if (allocated
!= 0) {
396 (void)mach_vm_deallocate(vmap
, stackaddr
, th_allocsize
);
399 (void)pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(ctask
), th_thport
);
400 (void)thread_terminate(th
);
401 (void)thread_deallocate(th
);
406 _bsdthread_terminate(__unused
struct proc
*p
,
407 user_addr_t stackaddr
,
411 __unused
int32_t *retval
)
413 mach_vm_offset_t freeaddr
;
414 mach_vm_size_t freesize
;
417 freeaddr
= (mach_vm_offset_t
)stackaddr
;
420 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_START
, freeaddr
, freesize
, kthport
, 0xff, 0);
422 if ((freesize
!= (mach_vm_size_t
)0) && (freeaddr
!= (mach_vm_offset_t
)0)) {
423 kret
= mach_vm_deallocate(pthread_kern
->current_map(), freeaddr
, freesize
);
424 if (kret
!= KERN_SUCCESS
) {
425 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, kret
, 0, 0, 0, 0);
430 (void) thread_terminate(current_thread());
431 if (sem
!= MACH_PORT_NULL
) {
432 kret
= pthread_kern
->semaphore_signal_internal_trap(sem
);
433 if (kret
!= KERN_SUCCESS
) {
434 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, kret
, 0, 0, 0, 0);
439 if (kthport
!= MACH_PORT_NULL
) {
440 pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(current_task()), kthport
);
443 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, 0, 0, 0, 0, 0);
445 pthread_kern
->thread_exception_return();
446 panic("bsdthread_terminate: still running\n");
448 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, 0, 0xff, 0, 0, 0);
454 _bsdthread_register(struct proc
*p
,
455 user_addr_t threadstart
,
456 user_addr_t wqthread
,
458 user_addr_t pthread_init_data
,
459 user_addr_t targetconc_ptr
,
460 uint64_t dispatchqueue_offset
,
463 /* prevent multiple registrations */
464 if (pthread_kern
->proc_get_register(p
) != 0) {
467 /* syscall randomizer test can pass bogus values */
468 if (pthsize
< 0 || pthsize
> MAX_PTHREAD_SIZE
) {
471 pthread_kern
->proc_set_threadstart(p
, threadstart
);
472 pthread_kern
->proc_set_wqthread(p
, wqthread
);
473 pthread_kern
->proc_set_pthsize(p
, pthsize
);
474 pthread_kern
->proc_set_register(p
);
476 /* if we have pthread_init_data, then we use that and target_concptr (which is an offset) get data. */
477 if (pthread_init_data
!= 0) {
478 thread_qos_policy_data_t qos
;
480 struct _pthread_registration_data data
;
481 size_t pthread_init_sz
= MIN(sizeof(struct _pthread_registration_data
), (size_t)targetconc_ptr
);
483 kern_return_t kr
= copyin(pthread_init_data
, &data
, pthread_init_sz
);
484 if (kr
!= KERN_SUCCESS
) {
488 /* Incoming data from the data structure */
489 pthread_kern
->proc_set_dispatchqueue_offset(p
, data
.dispatch_queue_offset
);
491 /* Outgoing data that userspace expects as a reply */
492 if (pthread_kern
->qos_main_thread_active()) {
493 mach_msg_type_number_t nqos
= THREAD_QOS_POLICY_COUNT
;
494 boolean_t gd
= FALSE
;
496 kr
= pthread_kern
->thread_policy_get(current_thread(), THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, &nqos
, &gd
);
497 if (kr
!= KERN_SUCCESS
|| qos
.qos_tier
== THREAD_QOS_UNSPECIFIED
) {
498 /* Unspecified threads means the kernel wants us to impose legacy upon the thread. */
499 qos
.qos_tier
= THREAD_QOS_LEGACY
;
500 qos
.tier_importance
= 0;
502 kr
= pthread_kern
->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, THREAD_QOS_POLICY_COUNT
);
505 if (kr
== KERN_SUCCESS
) {
506 data
.main_qos
= pthread_qos_class_get_priority(qos
.qos_tier
);
508 data
.main_qos
= _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED
, 0, 0);
511 data
.main_qos
= _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED
, 0, 0);
514 kr
= copyout(&data
, pthread_init_data
, pthread_init_sz
);
515 if (kr
!= KERN_SUCCESS
) {
519 pthread_kern
->proc_set_dispatchqueue_offset(p
, dispatchqueue_offset
);
520 pthread_kern
->proc_set_targconc(p
, targetconc_ptr
);
523 /* return the supported feature set as the return value. */
524 *retval
= PTHREAD_FEATURE_SUPPORTED
;
530 _bsdthread_ctl_set_qos(struct proc
*p
, user_addr_t __unused cmd
, mach_port_name_t kport
, user_addr_t tsd_priority_addr
, user_addr_t arg3
, int *retval
)
535 pthread_priority_t priority
;
537 /* Unused parameters must be zero. */
542 /* QoS is stored in a given slot in the pthread TSD. We need to copy that in and set our QoS based on it. */
543 if (proc_is64bit(p
)) {
545 kr
= copyin(tsd_priority_addr
, &v
, sizeof(v
));
546 if (kr
!= KERN_SUCCESS
) {
549 priority
= (int)(v
& 0xffffffff);
552 kr
= copyin(tsd_priority_addr
, &v
, sizeof(v
));
553 if (kr
!= KERN_SUCCESS
) {
559 if ((th
= port_name_to_thread(kport
)) == THREAD_NULL
) {
563 /* <rdar://problem/16211829> Disable pthread_set_qos_class_np() on threads other than pthread_self */
564 if (th
!= current_thread()) {
565 thread_deallocate(th
);
569 int rv
= _bsdthread_ctl_set_self(p
, 0, priority
, 0, _PTHREAD_SET_SELF_QOS_FLAG
, retval
);
571 /* Static param the thread, we just set QoS on it, so its stuck in QoS land now. */
572 /* pthread_kern->thread_static_param(th, TRUE); */ // see <rdar://problem/16433744>, for details
574 thread_deallocate(th
);
579 static inline struct threadlist
*
580 util_get_thread_threadlist_entry(thread_t th
)
582 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
584 struct threadlist
*tl
= pthread_kern
->uthread_get_threadlist(uth
);
591 wq_thread_override_reset(thread_t th
)
593 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
594 struct threadlist
*tl
= pthread_kern
->uthread_get_threadlist(uth
);
598 * Drop all outstanding overrides on this thread, done outside the wq lock
599 * because proc_usynch_thread_qos_remove_override takes a spinlock that
600 * could cause us to panic.
602 uint32_t count
= tl
->th_dispatch_override_count
;
603 while (!OSCompareAndSwap(count
, 0, &tl
->th_dispatch_override_count
)) {
604 count
= tl
->th_dispatch_override_count
;
607 PTHREAD_TRACE(TRACE_wq_override_reset
| DBG_FUNC_NONE
, tl
->th_workq
, count
, 0, 0, 0);
609 for (int i
=count
; i
>0; i
--) {
610 pthread_kern
->proc_usynch_thread_qos_remove_override(uth
, 0);
616 _bsdthread_ctl_set_self(struct proc
*p
, user_addr_t __unused cmd
, pthread_priority_t priority
, mach_port_name_t voucher
, _pthread_set_flags_t flags
, int __unused
*retval
)
618 thread_qos_policy_data_t qos
;
619 mach_msg_type_number_t nqos
= THREAD_QOS_POLICY_COUNT
;
620 boolean_t gd
= FALSE
;
623 int qos_rv
= 0, voucher_rv
= 0, fixedpri_rv
= 0;
625 if ((flags
& _PTHREAD_SET_SELF_QOS_FLAG
) != 0) {
626 kr
= pthread_kern
->thread_policy_get(current_thread(), THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, &nqos
, &gd
);
627 if (kr
!= KERN_SUCCESS
) {
632 /* If we have main-thread QoS then we don't allow a thread to come out of QOS_CLASS_UNSPECIFIED. */
633 if (pthread_kern
->qos_main_thread_active() && qos
.qos_tier
== THREAD_QOS_UNSPECIFIED
) {
638 /* Get the work queue for tracing, also the threadlist for bucket manipluation. */
639 struct workqueue
*wq
= NULL
;
640 struct threadlist
*tl
= util_get_thread_threadlist_entry(current_thread());
645 PTHREAD_TRACE(TRACE_pthread_set_qos_self
| DBG_FUNC_START
, wq
, qos
.qos_tier
, qos
.tier_importance
, 0, 0);
647 qos
.qos_tier
= pthread_priority_get_qos_class(priority
);
648 qos
.tier_importance
= (qos
.qos_tier
== QOS_CLASS_UNSPECIFIED
) ? 0 : _pthread_priority_get_relpri(priority
);
650 kr
= pthread_kern
->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, THREAD_QOS_POLICY_COUNT
);
651 if (kr
!= KERN_SUCCESS
) {
656 /* If we're a workqueue, the threadlist item priority needs adjusting, along with the bucket we were running in. */
658 workqueue_lock_spin(p
);
660 /* Fix up counters. */
661 uint8_t old_bucket
= tl
->th_priority
;
662 uint8_t new_bucket
= pthread_priority_get_class_index(priority
);
664 uint32_t old_active
= OSAddAtomic(-1, &wq
->wq_thactive_count
[old_bucket
]);
665 OSAddAtomic(1, &wq
->wq_thactive_count
[new_bucket
]);
667 wq
->wq_thscheduled_count
[old_bucket
]--;
668 wq
->wq_thscheduled_count
[new_bucket
]++;
670 tl
->th_priority
= new_bucket
;
672 /* If we were at the ceiling of non-overcommitted threads for a given bucket, we have to
673 * reevaluate whether we should start more work.
675 if (old_active
== wq
->wq_reqconc
[old_bucket
]) {
676 /* workqueue_run_nextreq will drop the workqueue lock in all exit paths. */
677 (void)workqueue_run_nextreq(p
, wq
, THREAD_NULL
, FALSE
, FALSE
, 0);
683 PTHREAD_TRACE(TRACE_pthread_set_qos_self
| DBG_FUNC_END
, wq
, qos
.qos_tier
, qos
.tier_importance
, 0, 0);
687 if ((flags
& _PTHREAD_SET_SELF_VOUCHER_FLAG
) != 0) {
688 kr
= pthread_kern
->thread_set_voucher_name(voucher
);
689 if (kr
!= KERN_SUCCESS
) {
696 if ((flags
& _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG
) != 0) {
697 thread_extended_policy_data_t extpol
;
698 thread_t thread
= current_thread();
700 extpol
.timeshare
= 0;
702 struct threadlist
*tl
= util_get_thread_threadlist_entry(thread
);
704 /* Not allowed on workqueue threads, since there is no symmetric clear function */
705 fixedpri_rv
= ENOTSUP
;
709 kr
= pthread_kern
->thread_policy_set_internal(thread
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extpol
, THREAD_EXTENDED_POLICY_COUNT
);
710 if (kr
!= KERN_SUCCESS
) {
711 fixedpri_rv
= EINVAL
;
717 if (qos_rv
&& voucher_rv
) {
718 /* Both failed, give that a unique error. */
738 _bsdthread_ctl_qos_override_start(struct proc __unused
*p
, user_addr_t __unused cmd
, mach_port_name_t kport
, pthread_priority_t priority
, user_addr_t arg3
, int __unused
*retval
)
747 if ((th
= port_name_to_thread(kport
)) == THREAD_NULL
) {
751 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
752 int override_qos
= pthread_priority_get_qos_class(priority
);
754 struct threadlist
*tl
= util_get_thread_threadlist_entry(th
);
756 /* Workqueue threads count their overrides, so they can forcibly balance any outstanding
757 * overrides when they return to the kernel.
759 uint32_t o
= OSAddAtomic(1, &tl
->th_override_count
);
760 PTHREAD_TRACE(TRACE_wq_override_start
| DBG_FUNC_NONE
, tl
->th_workq
, thread_tid(th
), o
+1, priority
, 0);
763 /* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */
764 pthread_kern
->proc_usynch_thread_qos_add_override(uth
, 0, override_qos
, TRUE
);
766 thread_deallocate(th
);
771 _bsdthread_ctl_qos_override_end(struct proc __unused
*p
, user_addr_t __unused cmd
, mach_port_name_t kport
, user_addr_t arg2
, user_addr_t arg3
, int __unused
*retval
)
776 if (arg2
!= 0 || arg3
!= 0) {
780 if ((th
= port_name_to_thread(kport
)) == THREAD_NULL
) {
784 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
786 struct threadlist
*tl
= util_get_thread_threadlist_entry(th
);
788 uint32_t o
= OSAddAtomic(-1, &tl
->th_override_count
);
790 PTHREAD_TRACE(TRACE_wq_override_end
| DBG_FUNC_NONE
, tl
->th_workq
, thread_tid(th
), o
-1, 0, 0);
794 thread_deallocate(th
);
799 pthread_kern
->proc_usynch_thread_qos_remove_override(uth
, 0);
801 thread_deallocate(th
);
806 _bsdthread_ctl_qos_override_dispatch(struct proc __unused
*p
, user_addr_t __unused cmd
, mach_port_name_t kport
, pthread_priority_t priority
, user_addr_t arg3
, int __unused
*retval
)
815 if ((th
= port_name_to_thread(kport
)) == THREAD_NULL
) {
819 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
820 int override_qos
= pthread_priority_get_qos_class(priority
);
822 struct threadlist
*tl
= util_get_thread_threadlist_entry(th
);
824 thread_deallocate(th
);
828 /* Workqueue threads count their overrides, so they can forcibly balance any outstanding
829 * overrides when they return to the kernel.
831 uint32_t o
= OSAddAtomic(1, &tl
->th_dispatch_override_count
);
832 PTHREAD_TRACE(TRACE_wq_override_dispatch
| DBG_FUNC_NONE
, tl
->th_workq
, thread_tid(th
), o
+1, priority
, 0);
834 /* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */
835 pthread_kern
->proc_usynch_thread_qos_add_override(uth
, 0, override_qos
, TRUE
);
837 thread_deallocate(th
);
842 _bsdthread_ctl_qos_override_reset(struct proc __unused
*p
, user_addr_t __unused cmd
, user_addr_t arg1
, user_addr_t arg2
, user_addr_t arg3
, int __unused
*retval
)
845 struct threadlist
*tl
;
848 if (arg1
!= 0 || arg2
!= 0 || arg3
!= 0) {
852 th
= current_thread();
853 tl
= util_get_thread_threadlist_entry(th
);
856 wq_thread_override_reset(th
);
865 _bsdthread_ctl(struct proc
*p
, user_addr_t cmd
, user_addr_t arg1
, user_addr_t arg2
, user_addr_t arg3
, int *retval
)
868 case BSDTHREAD_CTL_SET_QOS
:
869 return _bsdthread_ctl_set_qos(p
, cmd
, (mach_port_name_t
)arg1
, arg2
, arg3
, retval
);
870 case BSDTHREAD_CTL_QOS_OVERRIDE_START
:
871 return _bsdthread_ctl_qos_override_start(p
, cmd
, (mach_port_name_t
)arg1
, (pthread_priority_t
)arg2
, arg3
, retval
);
872 case BSDTHREAD_CTL_QOS_OVERRIDE_END
:
873 return _bsdthread_ctl_qos_override_end(p
, cmd
, (mach_port_name_t
)arg1
, arg2
, arg3
, retval
);
874 case BSDTHREAD_CTL_QOS_OVERRIDE_RESET
:
875 return _bsdthread_ctl_qos_override_reset(p
, cmd
, arg1
, arg2
, arg3
, retval
);
876 case BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH
:
877 return _bsdthread_ctl_qos_override_dispatch(p
, cmd
, (mach_port_name_t
)arg1
, (pthread_priority_t
)arg2
, arg3
, retval
);
878 case BSDTHREAD_CTL_SET_SELF
:
879 return _bsdthread_ctl_set_self(p
, cmd
, (pthread_priority_t
)arg1
, (mach_port_name_t
)arg2
, (_pthread_set_flags_t
)arg3
, retval
);
885 uint32_t wq_yielded_threshold
= WQ_YIELDED_THRESHOLD
;
886 uint32_t wq_yielded_window_usecs
= WQ_YIELDED_WINDOW_USECS
;
887 uint32_t wq_stalled_window_usecs
= WQ_STALLED_WINDOW_USECS
;
888 uint32_t wq_reduce_pool_window_usecs
= WQ_REDUCE_POOL_WINDOW_USECS
;
889 uint32_t wq_max_timer_interval_usecs
= WQ_MAX_TIMER_INTERVAL_USECS
;
890 uint32_t wq_max_threads
= WORKQUEUE_MAXTHREADS
;
891 uint32_t wq_max_constrained_threads
= WORKQUEUE_MAXTHREADS
/ 8;
894 SYSCTL_INT(_kern
, OID_AUTO
, wq_yielded_threshold
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
895 &wq_yielded_threshold
, 0, "");
897 SYSCTL_INT(_kern
, OID_AUTO
, wq_yielded_window_usecs
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
898 &wq_yielded_window_usecs
, 0, "");
900 SYSCTL_INT(_kern
, OID_AUTO
, wq_stalled_window_usecs
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
901 &wq_stalled_window_usecs
, 0, "");
903 SYSCTL_INT(_kern
, OID_AUTO
, wq_reduce_pool_window_usecs
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
904 &wq_reduce_pool_window_usecs
, 0, "");
906 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_timer_interval_usecs
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
907 &wq_max_timer_interval_usecs
, 0, "");
909 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_threads
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
910 &wq_max_threads
, 0, "");
912 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_constrained_threads
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
913 &wq_max_constrained_threads
, 0, "");
916 static uint32_t wq_init_constrained_limit
= 1;
920 _workqueue_init_lock(proc_t p
)
922 lck_spin_init(pthread_kern
->proc_get_wqlockptr(p
), pthread_lck_grp
, pthread_lck_attr
);
923 *(pthread_kern
->proc_get_wqinitingptr(p
)) = FALSE
;
927 _workqueue_destroy_lock(proc_t p
)
929 lck_spin_destroy(pthread_kern
->proc_get_wqlockptr(p
), pthread_lck_grp
);
934 workqueue_lock_spin(proc_t p
)
936 lck_spin_lock(pthread_kern
->proc_get_wqlockptr(p
));
940 workqueue_unlock(proc_t p
)
942 lck_spin_unlock(pthread_kern
->proc_get_wqlockptr(p
));
947 workqueue_interval_timer_start(struct workqueue
*wq
)
951 if (wq
->wq_timer_interval
== 0) {
952 wq
->wq_timer_interval
= wq_stalled_window_usecs
;
955 wq
->wq_timer_interval
= wq
->wq_timer_interval
* 2;
957 if (wq
->wq_timer_interval
> wq_max_timer_interval_usecs
) {
958 wq
->wq_timer_interval
= wq_max_timer_interval_usecs
;
961 clock_interval_to_deadline(wq
->wq_timer_interval
, 1000, &deadline
);
963 thread_call_enter_delayed(wq
->wq_atimer_call
, deadline
);
965 PTHREAD_TRACE(TRACE_wq_start_add_timer
, wq
, wq
->wq_reqcount
, wq
->wq_flags
, wq
->wq_timer_interval
, 0);
970 wq_thread_is_busy(uint64_t cur_ts
, uint64_t *lastblocked_tsp
)
974 uint64_t lastblocked_ts
;
978 * the timestamp is updated atomically w/o holding the workqueue lock
979 * so we need to do an atomic read of the 64 bits so that we don't see
980 * a mismatched pair of 32 bit reads... we accomplish this in an architecturally
981 * independent fashion by using OSCompareAndSwap64 to write back the
982 * value we grabbed... if it succeeds, then we have a good timestamp to
983 * evaluate... if it fails, we straddled grabbing the timestamp while it
984 * was being updated... treat a failed update as a busy thread since
985 * it implies we are about to see a really fresh timestamp anyway
987 lastblocked_ts
= *lastblocked_tsp
;
989 if ( !OSCompareAndSwap64((UInt64
)lastblocked_ts
, (UInt64
)lastblocked_ts
, lastblocked_tsp
))
992 if (lastblocked_ts
>= cur_ts
) {
994 * because the update of the timestamp when a thread blocks isn't
995 * serialized against us looking at it (i.e. we don't hold the workq lock)
996 * it's possible to have a timestamp that matches the current time or
997 * that even looks to be in the future relative to when we grabbed the current
998 * time... just treat this as a busy thread since it must have just blocked.
1002 elapsed
= cur_ts
- lastblocked_ts
;
1004 pthread_kern
->absolutetime_to_microtime(elapsed
, &secs
, &usecs
);
1006 if (secs
== 0 && usecs
< wq_stalled_window_usecs
)
1012 #define WQ_TIMER_NEEDED(wq, start_timer) do { \
1013 int oldflags = wq->wq_flags; \
1015 if ( !(oldflags & (WQ_EXITING | WQ_ATIMER_RUNNING))) { \
1016 if (OSCompareAndSwap(oldflags, oldflags | WQ_ATIMER_RUNNING, (UInt32 *)&wq->wq_flags)) \
1017 start_timer = TRUE; \
1024 workqueue_add_timer(struct workqueue
*wq
, __unused
int param1
)
1027 boolean_t start_timer
= FALSE
;
1029 boolean_t add_thread
;
1032 PTHREAD_TRACE(TRACE_wq_add_timer
| DBG_FUNC_START
, wq
, wq
->wq_flags
, wq
->wq_nthreads
, wq
->wq_thidlecount
, 0);
1036 workqueue_lock_spin(p
);
1039 * because workqueue_callback now runs w/o taking the workqueue lock
1040 * we are unsynchronized w/r to a change in state of the running threads...
1041 * to make sure we always evaluate that change, we allow it to start up
1042 * a new timer if the current one is actively evalutating the state
1043 * however, we do not need more than 2 timers fired up (1 active and 1 pending)
1044 * and we certainly do not want 2 active timers evaluating the state
1045 * simultaneously... so use WQL_ATIMER_BUSY to serialize the timers...
1046 * note that WQL_ATIMER_BUSY is in a different flag word from WQ_ATIMER_RUNNING since
1047 * it is always protected by the workq lock... WQ_ATIMER_RUNNING is evaluated
1048 * and set atomimcally since the callback function needs to manipulate it
1049 * w/o holding the workq lock...
1051 * !WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY == no pending timer, no active timer
1052 * !WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY == no pending timer, 1 active timer
1053 * WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY == 1 pending timer, no active timer
1054 * WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY == 1 pending timer, 1 active timer
1056 while (wq
->wq_lflags
& WQL_ATIMER_BUSY
) {
1057 wq
->wq_lflags
|= WQL_ATIMER_WAITING
;
1059 assert_wait((caddr_t
)wq
, (THREAD_UNINT
));
1060 workqueue_unlock(p
);
1062 thread_block(THREAD_CONTINUE_NULL
);
1064 workqueue_lock_spin(p
);
1066 wq
->wq_lflags
|= WQL_ATIMER_BUSY
;
1069 * the workq lock will protect us from seeing WQ_EXITING change state, but we
1070 * still need to update this atomically in case someone else tries to start
1071 * the timer just as we're releasing it
1073 while ( !(OSCompareAndSwap(wq
->wq_flags
, (wq
->wq_flags
& ~WQ_ATIMER_RUNNING
), (UInt32
*)&wq
->wq_flags
)));
1079 if ( !(wq
->wq_flags
& WQ_EXITING
)) {
1081 * check to see if the stall frequency was beyond our tolerance
1082 * or we have work on the queue, but haven't scheduled any
1083 * new work within our acceptable time interval because
1084 * there were no idle threads left to schedule
1086 if (wq
->wq_reqcount
) {
1088 uint32_t thactive_count
;
1092 for (priclass
= 0; priclass
< WORKQUEUE_NUM_BUCKETS
; priclass
++) {
1093 if (wq
->wq_requests
[priclass
])
1096 assert(priclass
< WORKQUEUE_NUM_BUCKETS
);
1098 curtime
= mach_absolute_time();
1103 * check for conditions under which we would not add a thread, either
1104 * a) we've got as many running threads as we want in this priority
1105 * band and the priority bands above it
1107 * b) check to see if the priority group has blocked threads, if the
1108 * last blocked timestamp is old enough, we will have already passed
1109 * (a) where we would have stopped if we had enough active threads.
1111 for (i
= 0; i
<= priclass
; i
++) {
1113 thactive_count
+= wq
->wq_thactive_count
[i
];
1115 if (wq
->wq_thscheduled_count
[i
]) {
1116 if (wq_thread_is_busy(curtime
, &wq
->wq_lastblocked_ts
[i
]))
1120 if (thactive_count
+ busycount
< wq
->wq_max_concurrency
) {
1122 if (wq
->wq_thidlecount
== 0) {
1124 * if we have no idle threads, try to add one
1126 retval
= workqueue_addnewthread(wq
, FALSE
);
1131 if (wq
->wq_reqcount
) {
1133 * as long as we have threads to schedule, and we successfully
1134 * scheduled new work, keep trying
1136 while (wq
->wq_thidlecount
&& !(wq
->wq_flags
& WQ_EXITING
)) {
1138 * workqueue_run_nextreq is responsible for
1139 * dropping the workqueue lock in all cases
1141 retval
= workqueue_run_nextreq(p
, wq
, THREAD_NULL
, FALSE
, FALSE
, 0);
1142 workqueue_lock_spin(p
);
1144 if (retval
== FALSE
)
1147 if ( !(wq
->wq_flags
& WQ_EXITING
) && wq
->wq_reqcount
) {
1149 if (wq
->wq_thidlecount
== 0 && retval
== TRUE
&& add_thread
== TRUE
)
1152 if (wq
->wq_thidlecount
== 0 || busycount
)
1153 WQ_TIMER_NEEDED(wq
, start_timer
);
1155 PTHREAD_TRACE(TRACE_wq_add_timer
| DBG_FUNC_NONE
, wq
, wq
->wq_reqcount
, wq
->wq_thidlecount
, busycount
, 0);
1160 if ( !(wq
->wq_flags
& WQ_ATIMER_RUNNING
))
1161 wq
->wq_timer_interval
= 0;
1163 wq
->wq_lflags
&= ~WQL_ATIMER_BUSY
;
1165 if ((wq
->wq_flags
& WQ_EXITING
) || (wq
->wq_lflags
& WQL_ATIMER_WAITING
)) {
1167 * wakeup the thread hung up in workqueue_exit or workqueue_add_timer waiting for this timer
1168 * to finish getting out of the way
1170 wq
->wq_lflags
&= ~WQL_ATIMER_WAITING
;
1174 PTHREAD_TRACE(TRACE_wq_add_timer
| DBG_FUNC_END
, wq
, start_timer
, wq
->wq_nthreads
, wq
->wq_thidlecount
, 0);
1176 workqueue_unlock(p
);
1178 if (start_timer
== TRUE
)
1179 workqueue_interval_timer_start(wq
);
1184 _workqueue_thread_yielded(void)
1186 struct workqueue
*wq
;
1191 if ((wq
= pthread_kern
->proc_get_wqptr(p
)) == NULL
|| wq
->wq_reqcount
== 0)
1194 workqueue_lock_spin(p
);
1196 if (wq
->wq_reqcount
) {
1202 if (wq
->wq_thread_yielded_count
++ == 0)
1203 wq
->wq_thread_yielded_timestamp
= mach_absolute_time();
1205 if (wq
->wq_thread_yielded_count
< wq_yielded_threshold
) {
1206 workqueue_unlock(p
);
1210 PTHREAD_TRACE(TRACE_wq_thread_yielded
| DBG_FUNC_START
, wq
, wq
->wq_thread_yielded_count
, wq
->wq_reqcount
, 0, 0);
1212 wq
->wq_thread_yielded_count
= 0;
1214 curtime
= mach_absolute_time();
1215 elapsed
= curtime
- wq
->wq_thread_yielded_timestamp
;
1216 pthread_kern
->absolutetime_to_microtime(elapsed
, &secs
, &usecs
);
1218 if (secs
== 0 && usecs
< wq_yielded_window_usecs
) {
1220 if (wq
->wq_thidlecount
== 0) {
1221 workqueue_addnewthread(wq
, TRUE
);
1223 * 'workqueue_addnewthread' drops the workqueue lock
1224 * when creating the new thread and then retakes it before
1225 * returning... this window allows other threads to process
1226 * requests, so we need to recheck for available work
1227 * if none found, we just return... the newly created thread
1228 * will eventually get used (if it hasn't already)...
1230 if (wq
->wq_reqcount
== 0) {
1231 workqueue_unlock(p
);
1235 if (wq
->wq_thidlecount
) {
1237 boolean_t overcommit
= FALSE
;
1238 boolean_t force_oc
= FALSE
;
1240 for (priority
= 0; priority
< WORKQUEUE_NUM_BUCKETS
; priority
++) {
1241 if (wq
->wq_requests
[priority
]) {
1245 assert(priority
< WORKQUEUE_NUM_BUCKETS
);
1248 wq
->wq_requests
[priority
]--;
1250 if (wq
->wq_ocrequests
[priority
]) {
1251 wq
->wq_ocrequests
[priority
]--;
1256 (void)workqueue_run_nextreq(p
, wq
, THREAD_NULL
, force_oc
, overcommit
, pthread_priority_from_class_index(priority
));
1258 * workqueue_run_nextreq is responsible for
1259 * dropping the workqueue lock in all cases
1261 PTHREAD_TRACE(TRACE_wq_thread_yielded
| DBG_FUNC_END
, wq
, wq
->wq_thread_yielded_count
, wq
->wq_reqcount
, 1, 0);
1266 PTHREAD_TRACE(TRACE_wq_thread_yielded
| DBG_FUNC_END
, wq
, wq
->wq_thread_yielded_count
, wq
->wq_reqcount
, 2, 0);
1268 workqueue_unlock(p
);
1274 workqueue_callback(int type
, thread_t thread
)
1276 struct uthread
*uth
;
1277 struct threadlist
*tl
;
1278 struct workqueue
*wq
;
1280 uth
= pthread_kern
->get_bsdthread_info(thread
);
1281 tl
= pthread_kern
->uthread_get_threadlist(uth
);
1285 case SCHED_CALL_BLOCK
: {
1286 uint32_t old_activecount
;
1287 boolean_t start_timer
= FALSE
;
1289 old_activecount
= OSAddAtomic(-1, &wq
->wq_thactive_count
[tl
->th_priority
]);
1291 if (old_activecount
== wq
->wq_reqconc
[tl
->th_priority
]) {
1293 UInt64
*lastblocked_ptr
;
1296 * the number of active threads at this priority
1297 * has fallen below the maximum number of concurrent
1298 * threads that we're allowed to run
1300 lastblocked_ptr
= (UInt64
*)&wq
->wq_lastblocked_ts
[tl
->th_priority
];
1301 curtime
= mach_absolute_time();
1304 * if we collide with another thread trying to update the last_blocked (really unlikely
1305 * since another thread would have to get scheduled and then block after we start down
1306 * this path), it's not a problem. Either timestamp is adequate, so no need to retry
1309 OSCompareAndSwap64(*lastblocked_ptr
, (UInt64
)curtime
, lastblocked_ptr
);
1311 if (wq
->wq_reqcount
) {
1313 * we have work to do so start up the timer
1314 * if it's not running... we'll let it sort
1315 * out whether we really need to start up
1318 WQ_TIMER_NEEDED(wq
, start_timer
);
1321 if (start_timer
== TRUE
) {
1322 workqueue_interval_timer_start(wq
);
1325 PTHREAD_TRACE1(TRACE_wq_thread_block
| DBG_FUNC_START
, wq
, old_activecount
, tl
->th_priority
, start_timer
, thread_tid(thread
));
1328 case SCHED_CALL_UNBLOCK
:
1330 * we cannot take the workqueue_lock here...
1331 * an UNBLOCK can occur from a timer event which
1332 * is run from an interrupt context... if the workqueue_lock
1333 * is already held by this processor, we'll deadlock...
1334 * the thread lock for the thread being UNBLOCKED
1337 OSAddAtomic(1, &wq
->wq_thactive_count
[tl
->th_priority
]);
1339 PTHREAD_TRACE1(TRACE_wq_thread_block
| DBG_FUNC_END
, wq
, wq
->wq_threads_scheduled
, tl
->th_priority
, 0, thread_tid(thread
));
1346 _workqueue_get_sched_callback(void)
1348 return workqueue_callback
;
1352 workqueue_removethread(struct threadlist
*tl
, int fromexit
)
1354 struct workqueue
*wq
;
1355 struct uthread
* uth
;
1358 * If fromexit is set, the call is from workqueue_exit(,
1359 * so some cleanups are to be avoided.
1363 TAILQ_REMOVE(&wq
->wq_thidlelist
, tl
, th_entry
);
1365 if (fromexit
== 0) {
1367 wq
->wq_thidlecount
--;
1371 * Clear the threadlist pointer in uthread so
1372 * blocked thread on wakeup for termination will
1373 * not access the thread list as it is going to be
1376 pthread_kern
->thread_sched_call(tl
->th_thread
, NULL
);
1378 uth
= pthread_kern
->get_bsdthread_info(tl
->th_thread
);
1379 if (uth
!= (struct uthread
*)0) {
1380 pthread_kern
->uthread_set_threadlist(uth
, NULL
);
1382 if (fromexit
== 0) {
1383 /* during exit the lock is not held */
1384 workqueue_unlock(wq
->wq_proc
);
1387 if ( (tl
->th_flags
& TH_LIST_SUSPENDED
) ) {
1389 * thread was created, but never used...
1390 * need to clean up the stack and port ourselves
1391 * since we're not going to spin up through the
1392 * normal exit path triggered from Libc
1394 if (fromexit
== 0) {
1395 /* vm map is already deallocated when this is called from exit */
1396 (void)mach_vm_deallocate(wq
->wq_map
, tl
->th_stackaddr
, tl
->th_allocsize
);
1398 (void)pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(wq
->wq_task
), tl
->th_thport
);
1400 PTHREAD_TRACE1(TRACE_wq_thread_suspend
| DBG_FUNC_END
, wq
, (uintptr_t)thread_tid(current_thread()), wq
->wq_nthreads
, 0xdead, thread_tid(tl
->th_thread
));
1403 PTHREAD_TRACE1(TRACE_wq_thread_park
| DBG_FUNC_END
, wq
, (uintptr_t)thread_tid(current_thread()), wq
->wq_nthreads
, 0xdead, thread_tid(tl
->th_thread
));
1406 * drop our ref on the thread
1408 thread_deallocate(tl
->th_thread
);
1410 kfree(tl
, sizeof(struct threadlist
));
1415 * called with workq lock held
1416 * dropped and retaken around thread creation
1417 * return with workq lock held
1420 workqueue_addnewthread(struct workqueue
*wq
, boolean_t oc_thread
)
1422 struct threadlist
*tl
;
1423 struct uthread
*uth
;
1428 mach_vm_offset_t stackaddr
;
1429 mach_vm_size_t guardsize
;
1431 if ((wq
->wq_flags
& WQ_EXITING
) == WQ_EXITING
)
1434 if (wq
->wq_nthreads
>= wq_max_threads
|| wq
->wq_nthreads
>= (pthread_kern
->config_thread_max
- 20)) {
1435 wq
->wq_lflags
|= WQL_EXCEEDED_TOTAL_THREAD_LIMIT
;
1438 wq
->wq_lflags
&= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT
;
1440 if (oc_thread
== FALSE
&& wq
->wq_constrained_threads_scheduled
>= wq_max_constrained_threads
) {
1442 * if we're not creating this thread to service an overcommit request,
1443 * then check the size of the constrained thread pool... if we've already
1444 * reached our max for threads scheduled from this pool, don't create a new
1445 * one... the callers of this function are prepared for failure.
1447 wq
->wq_lflags
|= WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT
;
1450 if (wq
->wq_constrained_threads_scheduled
< wq_max_constrained_threads
)
1451 wq
->wq_lflags
&= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT
;
1456 workqueue_unlock(p
);
1458 kret
= pthread_kern
->thread_create_workq(wq
->wq_task
, (thread_continue_t
)wq_unsuspend_continue
, &th
);
1459 if (kret
!= KERN_SUCCESS
) {
1463 tl
= kalloc(sizeof(struct threadlist
));
1464 bzero(tl
, sizeof(struct threadlist
));
1466 #if defined(__i386__) || defined(__x86_64__)
1467 stackaddr
= 0xB0000000;
1469 #error Need to define a stack address hint for this architecture
1472 guardsize
= vm_map_page_size(wq
->wq_map
);
1473 tl
->th_allocsize
= PTH_DEFAULT_STACKSIZE
+ guardsize
+ pthread_kern
->proc_get_pthsize(p
);
1475 kret
= mach_vm_map(wq
->wq_map
, &stackaddr
,
1478 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
, NULL
,
1479 0, FALSE
, VM_PROT_DEFAULT
, VM_PROT_ALL
,
1480 VM_INHERIT_DEFAULT
);
1482 if (kret
!= KERN_SUCCESS
) {
1483 kret
= mach_vm_allocate(wq
->wq_map
,
1484 &stackaddr
, tl
->th_allocsize
,
1485 VM_MAKE_TAG(VM_MEMORY_STACK
) | VM_FLAGS_ANYWHERE
);
1487 if (kret
== KERN_SUCCESS
) {
1489 * The guard page is at the lowest address
1490 * The stack base is the highest address
1492 kret
= mach_vm_protect(wq
->wq_map
, stackaddr
, guardsize
, FALSE
, VM_PROT_NONE
);
1494 if (kret
!= KERN_SUCCESS
)
1495 (void) mach_vm_deallocate(wq
->wq_map
, stackaddr
, tl
->th_allocsize
);
1497 if (kret
!= KERN_SUCCESS
) {
1498 (void) thread_terminate(th
);
1499 thread_deallocate(th
);
1501 kfree(tl
, sizeof(struct threadlist
));
1504 thread_reference(th
);
1506 sright
= (void *)pthread_kern
->convert_thread_to_port(th
);
1507 tl
->th_thport
= pthread_kern
->ipc_port_copyout_send(sright
, pthread_kern
->task_get_ipcspace(wq
->wq_task
));
1509 pthread_kern
->thread_static_param(th
, TRUE
);
1511 tl
->th_flags
= TH_LIST_INITED
| TH_LIST_SUSPENDED
;
1515 tl
->th_stackaddr
= stackaddr
;
1516 tl
->th_priority
= WORKQUEUE_NUM_BUCKETS
;
1519 uth
= pthread_kern
->get_bsdthread_info(tl
->th_thread
);
1521 workqueue_lock_spin(p
);
1523 pthread_kern
->uthread_set_threadlist(uth
, tl
);
1524 TAILQ_INSERT_TAIL(&wq
->wq_thidlelist
, tl
, th_entry
);
1526 wq
->wq_thidlecount
++;
1528 PTHREAD_TRACE1(TRACE_wq_thread_suspend
| DBG_FUNC_START
, wq
, wq
->wq_nthreads
, 0, thread_tid(current_thread()), thread_tid(tl
->th_thread
));
1533 workqueue_lock_spin(p
);
1541 _workq_open(struct proc
*p
, __unused
int32_t *retval
)
1543 struct workqueue
* wq
;
1549 boolean_t need_wakeup
= FALSE
;
1551 if (pthread_kern
->proc_get_register(p
) == 0) {
1555 num_cpus
= pthread_kern
->ml_get_max_cpus();
1557 if (wq_init_constrained_limit
) {
1560 * set up the limit for the constrained pool
1561 * this is a virtual pool in that we don't
1562 * maintain it on a separate idle and run list
1564 limit
= num_cpus
* WORKQUEUE_CONSTRAINED_FACTOR
;
1566 if (limit
> wq_max_constrained_threads
)
1567 wq_max_constrained_threads
= limit
;
1569 wq_init_constrained_limit
= 0;
1571 workqueue_lock_spin(p
);
1573 if (pthread_kern
->proc_get_wqptr(p
) == NULL
) {
1575 while (*pthread_kern
->proc_get_wqinitingptr(p
) == TRUE
) {
1577 assert_wait((caddr_t
)pthread_kern
->proc_get_wqinitingptr(p
), THREAD_UNINT
);
1578 workqueue_unlock(p
);
1580 thread_block(THREAD_CONTINUE_NULL
);
1582 workqueue_lock_spin(p
);
1584 if (pthread_kern
->proc_get_wqptr(p
) != NULL
) {
1588 *(pthread_kern
->proc_get_wqinitingptr(p
)) = TRUE
;
1590 workqueue_unlock(p
);
1592 wq_size
= sizeof(struct workqueue
);
1594 ptr
= (char *)kalloc(wq_size
);
1595 bzero(ptr
, wq_size
);
1597 wq
= (struct workqueue
*)ptr
;
1598 wq
->wq_flags
= WQ_LIST_INITED
;
1600 wq
->wq_max_concurrency
= num_cpus
;
1601 wq
->wq_task
= current_task();
1602 wq
->wq_map
= pthread_kern
->current_map();
1604 for (i
= 0; i
< WORKQUEUE_NUM_BUCKETS
; i
++)
1605 wq
->wq_reqconc
[i
] = (uint16_t)wq
->wq_max_concurrency
;
1607 TAILQ_INIT(&wq
->wq_thrunlist
);
1608 TAILQ_INIT(&wq
->wq_thidlelist
);
1610 wq
->wq_atimer_call
= thread_call_allocate((thread_call_func_t
)workqueue_add_timer
, (thread_call_param_t
)wq
);
1612 workqueue_lock_spin(p
);
1614 pthread_kern
->proc_set_wqptr(p
, wq
);
1615 pthread_kern
->proc_set_wqsize(p
, wq_size
);
1617 *(pthread_kern
->proc_get_wqinitingptr(p
)) = FALSE
;
1621 workqueue_unlock(p
);
1623 if (need_wakeup
== TRUE
) {
1624 wakeup(pthread_kern
->proc_get_wqinitingptr(p
));
1631 _workq_kernreturn(struct proc
*p
,
1633 __unused user_addr_t item
,
1636 __unused
int32_t *retval
)
1638 struct workqueue
*wq
;
1641 if (pthread_kern
->proc_get_register(p
) == 0) {
1646 case WQOPS_QUEUE_NEWSPISUPP
: {
1648 * arg2 = offset of serialno into dispatch queue
1652 pthread_kern
->proc_set_dispatchqueue_serialno_offset(p
, (uint64_t)offset
);
1655 case WQOPS_QUEUE_REQTHREADS
: {
1657 * arg2 = number of threads to start
1660 boolean_t overcommit
= FALSE
;
1661 int reqcount
= arg2
;
1662 pthread_priority_t priority
= arg3
;
1665 overcommit
= (_pthread_priority_get_flags(priority
) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
) != 0;
1666 class = pthread_priority_get_class_index(priority
);
1668 if ((reqcount
<= 0) || (class < 0) || (class >= WORKQUEUE_NUM_BUCKETS
)) {
1673 workqueue_lock_spin(p
);
1675 if ((wq
= (struct workqueue
*)pthread_kern
->proc_get_wqptr(p
)) == NULL
) {
1676 workqueue_unlock(p
);
1683 wq
->wq_reqcount
+= reqcount
;
1684 wq
->wq_requests
[class] += reqcount
;
1686 PTHREAD_TRACE(TRACE_wq_req_threads
| DBG_FUNC_NONE
, wq
, priority
, wq
->wq_requests
[class], reqcount
, 0);
1688 while (wq
->wq_reqcount
) {
1689 if (!workqueue_run_one(p
, wq
, overcommit
, priority
))
1693 PTHREAD_TRACE(TRACE_wq_req_octhreads
| DBG_FUNC_NONE
, wq
, priority
, wq
->wq_requests
[class], reqcount
, 0);
1696 if (!workqueue_run_one(p
, wq
, overcommit
, priority
))
1702 * we need to delay starting some of the overcommit requests...
1703 * we should only fail to create the overcommit threads if
1704 * we're at the max thread limit... as existing threads
1705 * return to the kernel, we'll notice the ocrequests
1706 * and spin them back to user space as the overcommit variety
1708 wq
->wq_reqcount
+= reqcount
;
1709 wq
->wq_requests
[class] += reqcount
;
1710 wq
->wq_ocrequests
[class] += reqcount
;
1712 PTHREAD_TRACE(TRACE_wq_delay_octhreads
| DBG_FUNC_NONE
, wq
, priority
, wq
->wq_requests
[class], reqcount
, 0);
1715 workqueue_unlock(p
);
1719 case WQOPS_THREAD_RETURN
: {
1720 thread_t th
= current_thread();
1721 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
1722 struct threadlist
*tl
= util_get_thread_threadlist_entry(th
);
1724 /* reset signal mask on the workqueue thread to default state */
1725 if (pthread_kern
->uthread_get_sigmask(uth
) != (sigset_t
)(~workq_threadmask
)) {
1726 pthread_kern
->proc_lock(p
);
1727 pthread_kern
->uthread_set_sigmask(uth
, ~workq_threadmask
);
1728 pthread_kern
->proc_unlock(p
);
1731 /* dropping WQ override counts has to be done outside the wq lock. */
1732 wq_thread_override_reset(th
);
1734 workqueue_lock_spin(p
);
1736 if ((wq
= (struct workqueue
*)pthread_kern
->proc_get_wqptr(p
)) == NULL
|| !tl
) {
1737 workqueue_unlock(p
);
1742 PTHREAD_TRACE(TRACE_wq_runitem
| DBG_FUNC_END
, wq
, 0, 0, 0, 0);
1745 (void)workqueue_run_nextreq(p
, wq
, th
, FALSE
, FALSE
, 0);
1747 * workqueue_run_nextreq is responsible for
1748 * dropping the workqueue lock in all cases
1761 * Routine: workqueue_mark_exiting
1763 * Function: Mark the work queue such that new threads will not be added to the
1764 * work queue after we return.
1766 * Conditions: Called against the current process.
1769 _workqueue_mark_exiting(struct proc
*p
)
1771 struct workqueue
*wq
= pthread_kern
->proc_get_wqptr(p
);
1775 PTHREAD_TRACE(TRACE_wq_pthread_exit
|DBG_FUNC_START
, wq
, 0, 0, 0, 0);
1777 workqueue_lock_spin(p
);
1780 * we now arm the timer in the callback function w/o holding the workq lock...
1781 * we do this by setting WQ_ATIMER_RUNNING via OSCompareAndSwap in order to
1782 * insure only a single timer if running and to notice that WQ_EXITING has
1783 * been set (we don't want to start a timer once WQ_EXITING is posted)
1785 * so once we have successfully set WQ_EXITING, we cannot fire up a new timer...
1786 * therefor no need to clear the timer state atomically from the flags
1788 * since we always hold the workq lock when dropping WQ_ATIMER_RUNNING
1789 * the check for and sleep until clear is protected
1791 while (!(OSCompareAndSwap(wq
->wq_flags
, (wq
->wq_flags
| WQ_EXITING
), (UInt32
*)&wq
->wq_flags
)));
1793 if (wq
->wq_flags
& WQ_ATIMER_RUNNING
) {
1794 if (thread_call_cancel(wq
->wq_atimer_call
) == TRUE
) {
1795 wq
->wq_flags
&= ~WQ_ATIMER_RUNNING
;
1798 while ((wq
->wq_flags
& WQ_ATIMER_RUNNING
) || (wq
->wq_lflags
& WQL_ATIMER_BUSY
)) {
1799 assert_wait((caddr_t
)wq
, (THREAD_UNINT
));
1800 workqueue_unlock(p
);
1802 thread_block(THREAD_CONTINUE_NULL
);
1804 workqueue_lock_spin(p
);
1806 workqueue_unlock(p
);
1808 PTHREAD_TRACE(TRACE_wq_pthread_exit
|DBG_FUNC_END
, 0, 0, 0, 0, 0);
1813 * Routine: workqueue_exit
1815 * Function: clean up the work queue structure(s) now that there are no threads
1816 * left running inside the work queue (except possibly current_thread).
1818 * Conditions: Called by the last thread in the process.
1819 * Called against current process.
1822 _workqueue_exit(struct proc
*p
)
1824 struct workqueue
* wq
;
1825 struct threadlist
* tl
, *tlist
;
1826 struct uthread
*uth
;
1829 wq
= pthread_kern
->proc_get_wqptr(p
);
1832 PTHREAD_TRACE(TRACE_wq_workqueue_exit
|DBG_FUNC_START
, wq
, 0, 0, 0, 0);
1834 wq_size
= pthread_kern
->proc_get_wqsize(p
);
1835 pthread_kern
->proc_set_wqptr(p
, NULL
);
1836 pthread_kern
->proc_set_wqsize(p
, 0);
1839 * Clean up workqueue data structures for threads that exited and
1840 * didn't get a chance to clean up after themselves.
1842 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thrunlist
, th_entry
, tlist
) {
1843 pthread_kern
->thread_sched_call(tl
->th_thread
, NULL
);
1845 uth
= pthread_kern
->get_bsdthread_info(tl
->th_thread
);
1846 if (uth
!= (struct uthread
*)0) {
1847 pthread_kern
->uthread_set_threadlist(uth
, NULL
);
1849 TAILQ_REMOVE(&wq
->wq_thrunlist
, tl
, th_entry
);
1852 * drop our last ref on the thread
1854 thread_deallocate(tl
->th_thread
);
1856 kfree(tl
, sizeof(struct threadlist
));
1858 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thidlelist
, th_entry
, tlist
) {
1859 workqueue_removethread(tl
, 1);
1861 thread_call_free(wq
->wq_atimer_call
);
1865 PTHREAD_TRACE(TRACE_wq_workqueue_exit
|DBG_FUNC_END
, 0, 0, 0, 0, 0);
1871 workqueue_run_one(proc_t p
, struct workqueue
*wq
, boolean_t overcommit
, pthread_priority_t priority
)
1875 if (wq
->wq_thidlecount
== 0) {
1876 if (overcommit
== FALSE
) {
1877 if (wq
->wq_constrained_threads_scheduled
< wq
->wq_max_concurrency
)
1878 workqueue_addnewthread(wq
, overcommit
);
1880 workqueue_addnewthread(wq
, overcommit
);
1882 if (wq
->wq_thidlecount
== 0)
1886 ran_one
= workqueue_run_nextreq(p
, wq
, THREAD_NULL
, FALSE
, overcommit
, priority
);
1888 * workqueue_run_nextreq is responsible for
1889 * dropping the workqueue lock in all cases
1891 workqueue_lock_spin(p
);
1899 * workqueue_run_nextreq:
1900 * called with the workqueue lock held...
1901 * responsible for dropping it in all cases
1904 workqueue_run_nextreq(proc_t p
, struct workqueue
*wq
, thread_t thread
,
1905 boolean_t force_oc
, boolean_t overcommit
, pthread_priority_t oc_prio
)
1907 thread_t th_to_run
= THREAD_NULL
;
1908 thread_t th_to_park
= THREAD_NULL
;
1909 int wake_thread
= 0;
1910 int reuse_thread
= WQ_FLAG_THREAD_REUSE
;
1911 uint32_t priclass
, orig_class
;
1912 uint32_t us_to_wait
;
1913 struct threadlist
*tl
= NULL
;
1914 struct uthread
*uth
= NULL
;
1915 boolean_t start_timer
= FALSE
;
1916 boolean_t adjust_counters
= TRUE
;
1918 uint32_t thactive_count
;
1921 PTHREAD_TRACE(TRACE_wq_run_nextitem
|DBG_FUNC_START
, wq
, thread
, wq
->wq_thidlecount
, wq
->wq_reqcount
, 0);
1923 if (thread
!= THREAD_NULL
) {
1924 uth
= pthread_kern
->get_bsdthread_info(thread
);
1926 if ((tl
= pthread_kern
->uthread_get_threadlist(uth
)) == NULL
) {
1927 panic("wq thread with no threadlist");
1932 * from here until we drop the workq lock
1933 * we can't be pre-empted since we hold
1934 * the lock in spin mode... this is important
1935 * since we have to independently update the priority that
1936 * the thread is associated with and the priorty based
1937 * counters that "workqueue_callback" also changes and bases
1940 dispatch_overcommit
:
1942 if (overcommit
|| force_oc
) {
1943 priclass
= pthread_priority_get_class_index(oc_prio
);
1945 if (thread
!= THREAD_NULL
) {
1949 goto grab_idle_thread
;
1951 if (wq
->wq_reqcount
) {
1952 for (priclass
= 0; priclass
< WORKQUEUE_NUM_BUCKETS
; priclass
++) {
1953 if (wq
->wq_requests
[priclass
])
1956 assert(priclass
< WORKQUEUE_NUM_BUCKETS
);
1958 if (wq
->wq_ocrequests
[priclass
] && (thread
!= THREAD_NULL
|| wq
->wq_thidlecount
)) {
1960 * handle delayed overcommit request...
1961 * they have priority over normal requests
1962 * within a given priority level
1965 wq
->wq_requests
[priclass
]--;
1966 wq
->wq_ocrequests
[priclass
]--;
1968 oc_prio
= pthread_priority_from_class_index(priclass
);
1971 goto dispatch_overcommit
;
1975 * if we get here, the work should be handled by a constrained thread
1977 if (wq
->wq_reqcount
== 0 || wq
->wq_constrained_threads_scheduled
>= wq_max_constrained_threads
) {
1979 * no work to do, or we're already at or over the scheduling limit for
1980 * constrained threads... just return or park the thread...
1981 * do not start the timer for this condition... if we don't have any work,
1982 * we'll check again when new work arrives... if we're over the limit, we need 1 or more
1983 * constrained threads to return to the kernel before we can dispatch additional work
1985 if ((th_to_park
= thread
) == THREAD_NULL
)
1993 curtime
= mach_absolute_time();
1995 thactive_count
+= wq
->wq_thactive_count
[priclass
];
1997 if (wq
->wq_thscheduled_count
[priclass
]) {
1998 if (wq_thread_is_busy(curtime
, &wq
->wq_lastblocked_ts
[priclass
])) {
2003 if (thread
!= THREAD_NULL
) {
2004 if (tl
->th_priority
== priclass
) {
2006 * dont't count this thread as currently active
2011 if (thactive_count
+ busycount
>= wq
->wq_max_concurrency
) {
2014 * we found at least 1 thread in the
2015 * 'busy' state... make sure we start
2016 * the timer because if they are the only
2017 * threads keeping us from scheduling
2018 * this work request, we won't get a callback
2019 * to kick off the timer... we need to
2022 WQ_TIMER_NEEDED(wq
, start_timer
);
2025 PTHREAD_TRACE(TRACE_wq_overcommitted
|DBG_FUNC_NONE
, wq
, (start_timer
? 1<<7 : 0) | pthread_priority_from_class_index(priclass
), thactive_count
, busycount
, 0);
2027 if ((th_to_park
= thread
) == THREAD_NULL
) {
2034 if (thread
!= THREAD_NULL
) {
2036 * thread is non-NULL here when we return from userspace
2037 * in workq_kernreturn, rather than trying to find a thread
2038 * we pick up new work for this specific thread.
2045 if (wq
->wq_thidlecount
== 0) {
2047 * we have no additional threads waiting to pick up
2048 * work, however, there is additional work to do.
2050 WQ_TIMER_NEEDED(wq
, start_timer
);
2052 PTHREAD_TRACE(TRACE_wq_stalled
, wq
, wq
->wq_nthreads
, start_timer
, 0, 0);
2054 goto no_thread_to_run
;
2058 * we already know there is both work available
2059 * and an idle thread, so activate a thread and then
2060 * fall into the code that pulls a new work request...
2062 tl
= TAILQ_FIRST(&wq
->wq_thidlelist
);
2063 TAILQ_REMOVE(&wq
->wq_thidlelist
, tl
, th_entry
);
2064 wq
->wq_thidlecount
--;
2066 TAILQ_INSERT_TAIL(&wq
->wq_thrunlist
, tl
, th_entry
);
2068 if ((tl
->th_flags
& TH_LIST_SUSPENDED
) == TH_LIST_SUSPENDED
) {
2069 tl
->th_flags
&= ~TH_LIST_SUSPENDED
;
2072 } else if ((tl
->th_flags
& TH_LIST_BLOCKED
) == TH_LIST_BLOCKED
) {
2073 tl
->th_flags
&= ~TH_LIST_BLOCKED
;
2076 tl
->th_flags
|= TH_LIST_RUNNING
| TH_LIST_BUSY
;
2078 wq
->wq_threads_scheduled
++;
2079 wq
->wq_thscheduled_count
[priclass
]++;
2080 OSAddAtomic(1, &wq
->wq_thactive_count
[priclass
]);
2082 adjust_counters
= FALSE
;
2083 th_to_run
= tl
->th_thread
;
2086 if (!overcommit
&& !force_oc
) {
2088 wq
->wq_requests
[priclass
]--;
2090 if ( !(tl
->th_flags
& TH_LIST_CONSTRAINED
)) {
2091 wq
->wq_constrained_threads_scheduled
++;
2092 tl
->th_flags
|= TH_LIST_CONSTRAINED
;
2095 if (tl
->th_flags
& TH_LIST_CONSTRAINED
) {
2096 wq
->wq_constrained_threads_scheduled
--;
2097 tl
->th_flags
&= ~TH_LIST_CONSTRAINED
;
2101 orig_class
= tl
->th_priority
;
2102 tl
->th_priority
= (uint8_t)priclass
;
2104 if (adjust_counters
&& (orig_class
!= priclass
)) {
2106 * we need to adjust these counters based on this
2107 * thread's new disposition w/r to priority
2109 OSAddAtomic(-1, &wq
->wq_thactive_count
[orig_class
]);
2110 OSAddAtomic(1, &wq
->wq_thactive_count
[priclass
]);
2112 wq
->wq_thscheduled_count
[orig_class
]--;
2113 wq
->wq_thscheduled_count
[priclass
]++;
2115 wq
->wq_thread_yielded_count
= 0;
2117 workqueue_unlock(p
);
2119 if (orig_class
!= priclass
) {
2120 pthread_priority_t pri
= pthread_priority_from_class_index(priclass
);
2122 thread_qos_policy_data_t qosinfo
;
2124 /* Set the QoS tier on the thread, along with the ceiling of max importance for this class. */
2125 qosinfo
.qos_tier
= pthread_priority_get_qos_class(pri
);
2126 qosinfo
.tier_importance
= 0;
2128 PTHREAD_TRACE(TRACE_wq_reset_priority
| DBG_FUNC_START
, wq
, thread_tid(tl
->th_thread
), pthread_priority_from_class_index(orig_class
), 0, 0);
2130 /* All the previous implementation here now boils down to setting the QoS policy on the thread. */
2131 pthread_kern
->thread_policy_set_internal(th_to_run
, THREAD_QOS_POLICY
, (thread_policy_t
)&qosinfo
, THREAD_QOS_POLICY_COUNT
);
2133 PTHREAD_TRACE(TRACE_wq_reset_priority
| DBG_FUNC_END
, wq
, thread_tid(tl
->th_thread
), pthread_priority_from_class_index(priclass
), qosinfo
.qos_tier
, 0);
2137 * if current thread is reused for work request, does not return via unix_syscall
2139 wq_runreq(p
, overcommit
, pthread_priority_from_class_index(priclass
), th_to_run
, tl
, reuse_thread
, wake_thread
, (thread
== th_to_run
));
2141 PTHREAD_TRACE(TRACE_wq_run_nextitem
|DBG_FUNC_END
, wq
, thread_tid(th_to_run
), overcommit
, 1, 0);
2147 * we have no work to do or we are fully booked
2148 * w/r to running threads...
2151 workqueue_unlock(p
);
2154 workqueue_interval_timer_start(wq
);
2156 PTHREAD_TRACE(TRACE_wq_run_nextitem
|DBG_FUNC_END
, wq
, thread_tid(thread
), start_timer
, 2, 0);
2162 * this is a workqueue thread with no more
2163 * work to do... park it for now
2165 TAILQ_REMOVE(&wq
->wq_thrunlist
, tl
, th_entry
);
2166 tl
->th_flags
&= ~TH_LIST_RUNNING
;
2168 tl
->th_flags
|= TH_LIST_BLOCKED
;
2169 TAILQ_INSERT_HEAD(&wq
->wq_thidlelist
, tl
, th_entry
);
2171 pthread_kern
->thread_sched_call(th_to_park
, NULL
);
2173 OSAddAtomic(-1, &wq
->wq_thactive_count
[tl
->th_priority
]);
2174 wq
->wq_thscheduled_count
[tl
->th_priority
]--;
2175 wq
->wq_threads_scheduled
--;
2177 if (tl
->th_flags
& TH_LIST_CONSTRAINED
) {
2178 wq
->wq_constrained_threads_scheduled
--;
2179 wq
->wq_lflags
&= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT
;
2180 tl
->th_flags
&= ~TH_LIST_CONSTRAINED
;
2182 if (wq
->wq_thidlecount
< 100)
2183 us_to_wait
= wq_reduce_pool_window_usecs
- (wq
->wq_thidlecount
* (wq_reduce_pool_window_usecs
/ 100));
2185 us_to_wait
= wq_reduce_pool_window_usecs
/ 100;
2187 wq
->wq_thidlecount
++;
2188 wq
->wq_lflags
&= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT
;
2190 assert_wait_timeout_with_leeway((caddr_t
)tl
, (THREAD_INTERRUPTIBLE
),
2191 TIMEOUT_URGENCY_SYS_BACKGROUND
|TIMEOUT_URGENCY_LEEWAY
, us_to_wait
,
2192 wq_reduce_pool_window_usecs
, NSEC_PER_USEC
);
2194 workqueue_unlock(p
);
2197 workqueue_interval_timer_start(wq
);
2199 PTHREAD_TRACE1(TRACE_wq_thread_park
| DBG_FUNC_START
, wq
, wq
->wq_threads_scheduled
, wq
->wq_thidlecount
, us_to_wait
, thread_tid(th_to_park
));
2200 PTHREAD_TRACE(TRACE_wq_run_nextitem
| DBG_FUNC_END
, wq
, thread_tid(thread
), 0, 3, 0);
2202 thread_block((thread_continue_t
)wq_unpark_continue
);
2210 wq_unsuspend_continue(void)
2212 struct uthread
*uth
= NULL
;
2213 thread_t th_to_unsuspend
;
2214 struct threadlist
*tl
;
2217 th_to_unsuspend
= current_thread();
2218 uth
= pthread_kern
->get_bsdthread_info(th_to_unsuspend
);
2220 if (uth
!= NULL
&& (tl
= pthread_kern
->uthread_get_threadlist(uth
)) != NULL
) {
2222 if ((tl
->th_flags
& (TH_LIST_RUNNING
| TH_LIST_BUSY
)) == TH_LIST_RUNNING
) {
2224 * most likely a normal resume of this thread occurred...
2225 * it's also possible that the thread was aborted after we
2226 * finished setting it up so that it could be dispatched... if
2227 * so, thread_bootstrap_return will notice the abort and put
2228 * the thread on the path to self-destruction
2230 normal_resume_to_user
:
2231 pthread_kern
->thread_sched_call(th_to_unsuspend
, workqueue_callback
);
2232 pthread_kern
->thread_bootstrap_return();
2235 * if we get here, it's because we've been resumed due to
2236 * an abort of this thread (process is crashing)
2240 workqueue_lock_spin(p
);
2242 if (tl
->th_flags
& TH_LIST_SUSPENDED
) {
2244 * thread has been aborted while still on our idle
2245 * queue... remove it from our domain...
2246 * workqueue_removethread consumes the lock
2248 workqueue_removethread(tl
, 0);
2249 pthread_kern
->thread_bootstrap_return();
2251 while ((tl
->th_flags
& TH_LIST_BUSY
)) {
2253 * this thread was aborted after we started making
2254 * it runnable, but before we finished dispatching it...
2255 * we need to wait for that process to finish,
2256 * and we need to ask for a wakeup instead of a
2257 * thread_resume since the abort has already resumed us
2259 tl
->th_flags
|= TH_LIST_NEED_WAKEUP
;
2261 assert_wait((caddr_t
)tl
, (THREAD_UNINT
));
2263 workqueue_unlock(p
);
2264 thread_block(THREAD_CONTINUE_NULL
);
2265 workqueue_lock_spin(p
);
2267 workqueue_unlock(p
);
2269 * we have finished setting up the thread's context...
2270 * thread_bootstrap_return will take us through the abort path
2271 * where the thread will self destruct
2273 goto normal_resume_to_user
;
2275 pthread_kern
->thread_bootstrap_return();
2280 wq_unpark_continue(void)
2282 struct uthread
*uth
= NULL
;
2283 struct threadlist
*tl
;
2284 thread_t th_to_unpark
;
2287 th_to_unpark
= current_thread();
2288 uth
= pthread_kern
->get_bsdthread_info(th_to_unpark
);
2291 if ((tl
= pthread_kern
->uthread_get_threadlist(uth
)) != NULL
) {
2293 if ((tl
->th_flags
& (TH_LIST_RUNNING
| TH_LIST_BUSY
)) == TH_LIST_RUNNING
) {
2295 * a normal wakeup of this thread occurred... no need
2296 * for any synchronization with the timer and wq_runreq
2298 normal_return_to_user
:
2299 pthread_kern
->thread_sched_call(th_to_unpark
, workqueue_callback
);
2301 PTHREAD_TRACE(0xefffd018 | DBG_FUNC_END
, tl
->th_workq
, 0, 0, 0, 0);
2303 pthread_kern
->thread_exception_return();
2307 workqueue_lock_spin(p
);
2309 if ( !(tl
->th_flags
& TH_LIST_RUNNING
)) {
2311 * the timer popped us out and we've not
2312 * been moved off of the idle list
2313 * so we should now self-destruct
2315 * workqueue_removethread consumes the lock
2317 workqueue_removethread(tl
, 0);
2318 pthread_kern
->thread_exception_return();
2321 * the timer woke us up, but we have already
2322 * started to make this a runnable thread,
2323 * but have not yet finished that process...
2324 * so wait for the normal wakeup
2326 while ((tl
->th_flags
& TH_LIST_BUSY
)) {
2328 assert_wait((caddr_t
)tl
, (THREAD_UNINT
));
2330 workqueue_unlock(p
);
2332 thread_block(THREAD_CONTINUE_NULL
);
2334 workqueue_lock_spin(p
);
2337 * we have finished setting up the thread's context
2338 * now we can return as if we got a normal wakeup
2340 workqueue_unlock(p
);
2342 goto normal_return_to_user
;
2345 pthread_kern
->thread_exception_return();
2351 wq_runreq(proc_t p
, boolean_t overcommit
, pthread_priority_t priority
, thread_t th
, struct threadlist
*tl
,
2352 int reuse_thread
, int wake_thread
, int return_directly
)
2355 boolean_t need_resume
= FALSE
;
2357 PTHREAD_TRACE1(TRACE_wq_runitem
| DBG_FUNC_START
, tl
->th_workq
, overcommit
, priority
, thread_tid(current_thread()), thread_tid(th
));
2359 ret
= _setup_wqthread(p
, th
, overcommit
, priority
, reuse_thread
, tl
);
2362 panic("setup_wqthread failed %x\n", ret
);
2364 if (return_directly
) {
2365 PTHREAD_TRACE(TRACE_wq_run_nextitem
|DBG_FUNC_END
, tl
->th_workq
, 0, 0, 4, 0);
2367 pthread_kern
->thread_exception_return();
2368 panic("wq_runreq: thread_exception_return returned ...\n");
2371 workqueue_lock_spin(p
);
2373 tl
->th_flags
&= ~TH_LIST_BUSY
;
2376 workqueue_unlock(p
);
2378 PTHREAD_TRACE1(TRACE_wq_thread_suspend
| DBG_FUNC_END
, tl
->th_workq
, 0, 0, thread_tid(current_thread()), thread_tid(th
));
2380 workqueue_lock_spin(p
);
2382 if (tl
->th_flags
& TH_LIST_NEED_WAKEUP
) {
2388 tl
->th_flags
&= ~(TH_LIST_BUSY
| TH_LIST_NEED_WAKEUP
);
2390 workqueue_unlock(p
);
2394 * need to do this outside of the workqueue spin lock
2395 * since thread_resume locks the thread via a full mutex
2397 pthread_kern
->thread_resume(th
);
2404 _setup_wqthread(proc_t p
, thread_t th
, boolean_t overcommit
, pthread_priority_t priority
, int reuse_thread
, struct threadlist
*tl
)
2406 uint32_t flags
= reuse_thread
| WQ_FLAG_THREAD_NEWSPI
;
2407 mach_vm_size_t guardsize
= vm_map_page_size(tl
->th_workq
->wq_map
);
2411 flags
|= WQ_FLAG_THREAD_OVERCOMMIT
;
2414 /* Put the QoS class value into the lower bits of the reuse_thread register, this is where
2415 * the thread priority used to be stored anyway.
2417 flags
|= (_pthread_priority_get_qos_newest(priority
) & WQ_FLAG_THREAD_PRIOMASK
);
2419 #if defined(__i386__) || defined(__x86_64__)
2420 int isLP64
= proc_is64bit(p
);
2423 * Set up i386 registers & function call.
2426 x86_thread_state32_t state
;
2427 x86_thread_state32_t
*ts
= &state
;
2429 ts
->eip
= (unsigned int)pthread_kern
->proc_get_wqthread(p
);
2430 ts
->eax
= (unsigned int)(tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ guardsize
);
2431 ts
->ebx
= (unsigned int)tl
->th_thport
;
2432 ts
->ecx
= (unsigned int)(tl
->th_stackaddr
+ guardsize
);
2433 ts
->edx
= (unsigned int)0;
2434 ts
->edi
= (unsigned int)flags
;
2435 ts
->esi
= (unsigned int)0;
2439 ts
->esp
= (int)((vm_offset_t
)((tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ guardsize
) - C_32_STK_ALIGN
));
2441 (void)pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)ts
);
2444 x86_thread_state64_t state64
;
2445 x86_thread_state64_t
*ts64
= &state64
;
2447 ts64
->rip
= (uint64_t)pthread_kern
->proc_get_wqthread(p
);
2448 ts64
->rdi
= (uint64_t)(tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ guardsize
);
2449 ts64
->rsi
= (uint64_t)(tl
->th_thport
);
2450 ts64
->rdx
= (uint64_t)(tl
->th_stackaddr
+ guardsize
);
2451 ts64
->rcx
= (uint64_t)0;
2452 ts64
->r8
= (uint64_t)flags
;
2453 ts64
->r9
= (uint64_t)0;
2456 * set stack pointer aligned to 16 byte boundary
2458 ts64
->rsp
= (uint64_t)((tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ guardsize
) - C_64_REDZONE_LEN
);
2460 error
= pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)ts64
);
2461 if (error
!= KERN_SUCCESS
) {
2466 #error setup_wqthread not defined for this architecture
2473 _fill_procworkqueue(proc_t p
, struct proc_workqueueinfo
* pwqinfo
)
2475 struct workqueue
* wq
;
2480 workqueue_lock_spin(p
);
2481 if ((wq
= pthread_kern
->proc_get_wqptr(p
)) == NULL
) {
2487 for (pri
= 0; pri
< WORKQUEUE_NUM_BUCKETS
; pri
++) {
2488 activecount
+= wq
->wq_thactive_count
[pri
];
2490 pwqinfo
->pwq_nthreads
= wq
->wq_nthreads
;
2491 pwqinfo
->pwq_runthreads
= activecount
;
2492 pwqinfo
->pwq_blockedthreads
= wq
->wq_threads_scheduled
- activecount
;
2493 pwqinfo
->pwq_state
= 0;
2495 if (wq
->wq_lflags
& WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT
) {
2496 pwqinfo
->pwq_state
|= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT
;
2499 if (wq
->wq_lflags
& WQL_EXCEEDED_TOTAL_THREAD_LIMIT
) {
2500 pwqinfo
->pwq_state
|= WQ_EXCEEDED_TOTAL_THREAD_LIMIT
;
2504 workqueue_unlock(p
);
2509 _thread_selfid(__unused
struct proc
*p
, uint64_t *retval
)
2511 thread_t thread
= current_thread();
2512 *retval
= thread_tid(thread
);
2513 return KERN_SUCCESS
;
2519 pthread_lck_grp_attr
= lck_grp_attr_alloc_init();
2520 pthread_lck_grp
= lck_grp_alloc_init("pthread", pthread_lck_grp_attr
);
2523 * allocate the lock attribute for pthread synchronizers
2525 pthread_lck_attr
= lck_attr_alloc_init();
2527 _workqueue_init_lock((proc_t
)get_bsdtask_info(kernel_task
));
2528 pthread_list_mlock
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
2530 pth_global_hashinit();
2531 psynch_thcall
= thread_call_allocate(psynch_wq_cleanup
, NULL
);
2537 sysctl_register_oid(&sysctl__kern_wq_yielded_threshold
);
2538 sysctl_register_oid(&sysctl__kern_wq_yielded_window_usecs
);
2539 sysctl_register_oid(&sysctl__kern_wq_stalled_window_usecs
);
2540 sysctl_register_oid(&sysctl__kern_wq_reduce_pool_window_usecs
);
2541 sysctl_register_oid(&sysctl__kern_wq_max_timer_interval_usecs
);
2542 sysctl_register_oid(&sysctl__kern_wq_max_threads
);
2543 sysctl_register_oid(&sysctl__kern_wq_max_constrained_threads
);
2544 sysctl_register_oid(&sysctl__kern_pthread_debug_tracing
);