2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
33 #pragma mark - Front Matter
35 #define _PTHREAD_CONDATTR_T
36 #define _PTHREAD_COND_T
37 #define _PTHREAD_MUTEXATTR_T
38 #define _PTHREAD_MUTEX_T
39 #define _PTHREAD_RWLOCKATTR_T
40 #define _PTHREAD_RWLOCK_T
42 #undef pthread_mutexattr_t
43 #undef pthread_mutex_t
44 #undef pthread_condattr_t
46 #undef pthread_rwlockattr_t
47 #undef pthread_rwlock_t
49 #include <sys/cdefs.h>
52 // <rdar://problem/26158937> panic() should be marked noreturn
53 extern void panic(const char *string
, ...) __printflike(1,2) __dead2
;
55 #include <sys/param.h>
56 #include <sys/queue.h>
57 #include <sys/resourcevar.h>
58 //#include <sys/proc_internal.h>
59 #include <sys/kauth.h>
60 #include <sys/systm.h>
61 #include <sys/timeb.h>
62 #include <sys/times.h>
64 #include <sys/kernel.h>
66 #include <sys/signalvar.h>
67 #include <sys/sysctl.h>
68 #include <sys/syslog.h>
71 #include <sys/kdebug.h>
72 //#include <sys/sysproto.h>
74 #include <sys/user.h> /* for coredump */
75 #include <sys/proc_info.h> /* for fill_procworkqueue */
77 #include <mach/mach_port.h>
78 #include <mach/mach_types.h>
79 #include <mach/semaphore.h>
80 #include <mach/sync_policy.h>
81 #include <mach/task.h>
82 #include <mach/vm_prot.h>
83 #include <kern/kern_types.h>
84 #include <kern/task.h>
85 #include <kern/clock.h>
86 #include <mach/kern_return.h>
87 #include <kern/thread.h>
88 #include <kern/zalloc.h>
89 #include <kern/sched_prim.h> /* for thread_exception_return */
90 #include <kern/processor.h>
91 #include <kern/assert.h>
92 #include <mach/mach_vm.h>
93 #include <mach/mach_param.h>
94 #include <mach/thread_status.h>
95 #include <mach/thread_policy.h>
96 #include <mach/message.h>
97 #include <mach/port.h>
98 //#include <vm/vm_protos.h>
99 #include <vm/vm_fault.h>
100 #include <vm/vm_map.h>
101 #include <mach/thread_act.h> /* for thread_resume */
102 #include <machine/machine_routines.h>
103 #include <mach/shared_region.h>
105 #include <libkern/OSAtomic.h>
106 #include <libkern/libkern.h>
108 #include <sys/pthread_shims.h>
109 #include "kern_internal.h"
111 // XXX: Dirty import for sys/signarvar.h that's wrapped in BSD_KERNEL_PRIVATE
112 #define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP))
114 // XXX: Ditto for thread tags from kern/thread.h
115 #define THREAD_TAG_MAINTHREAD 0x1
116 #define THREAD_TAG_PTHREAD 0x10
117 #define THREAD_TAG_WORKQUEUE 0x20
119 lck_grp_attr_t
*pthread_lck_grp_attr
;
120 lck_grp_t
*pthread_lck_grp
;
121 lck_attr_t
*pthread_lck_attr
;
123 zone_t pthread_zone_workqueue
;
124 zone_t pthread_zone_threadlist
;
125 zone_t pthread_zone_threadreq
;
127 extern void thread_set_cthreadself(thread_t thread
, uint64_t pself
, int isLP64
);
128 extern void workqueue_thread_yielded(void);
130 #define WQ_SETUP_FIRST_USE 1
131 #define WQ_SETUP_CLEAR_VOUCHER 2
132 static void _setup_wqthread(proc_t p
, thread_t th
, struct workqueue
*wq
,
133 struct threadlist
*tl
, int flags
);
135 static void reset_priority(struct threadlist
*tl
, pthread_priority_t pri
);
136 static pthread_priority_t
pthread_priority_from_wq_class_index(struct workqueue
*wq
, int index
);
138 static void wq_unpark_continue(void* ptr
, wait_result_t wait_result
) __dead2
;
140 static bool workqueue_addnewthread(proc_t p
, struct workqueue
*wq
);
141 static void workqueue_removethread(struct threadlist
*tl
, bool fromexit
, bool first_use
);
142 static void workqueue_lock_spin(struct workqueue
*);
143 static void workqueue_unlock(struct workqueue
*);
145 #define WQ_RUN_TR_THROTTLED 0
146 #define WQ_RUN_TR_THREAD_NEEDED 1
147 #define WQ_RUN_TR_THREAD_STARTED 2
148 #define WQ_RUN_TR_EXITING 3
149 static int workqueue_run_threadreq_and_unlock(proc_t p
, struct workqueue
*wq
,
150 struct threadlist
*tl
, struct threadreq
*req
, bool may_add_new_thread
);
152 static bool may_start_constrained_thread(struct workqueue
*wq
,
153 uint32_t at_priclass
, struct threadlist
*tl
, bool may_start_timer
);
155 static mach_vm_offset_t
stack_addr_hint(proc_t p
, vm_map_t vmap
);
156 static boolean_t
wq_thread_is_busy(uint64_t cur_ts
,
157 _Atomic
uint64_t *lastblocked_tsp
);
159 int proc_settargetconc(pid_t pid
, int queuenum
, int32_t targetconc
);
160 int proc_setalltargetconc(pid_t pid
, int32_t * targetconcp
);
162 #define WQ_MAXPRI_MIN 0 /* low prio queue num */
163 #define WQ_MAXPRI_MAX 2 /* max prio queuenum */
164 #define WQ_PRI_NUM 3 /* number of prio work queues */
166 #define C_32_STK_ALIGN 16
167 #define C_64_STK_ALIGN 16
168 #define C_64_REDZONE_LEN 128
170 #define PTHREAD_T_OFFSET 0
173 * Flags filed passed to bsdthread_create and back in pthread_start
174 31 <---------------------------------> 0
175 _________________________________________
176 | flags(8) | policy(8) | importance(16) |
177 -----------------------------------------
180 #define PTHREAD_START_CUSTOM 0x01000000
181 #define PTHREAD_START_SETSCHED 0x02000000
182 #define PTHREAD_START_DETACHED 0x04000000
183 #define PTHREAD_START_QOSCLASS 0x08000000
184 #define PTHREAD_START_TSD_BASE_SET 0x10000000
185 #define PTHREAD_START_QOSCLASS_MASK 0x00ffffff
186 #define PTHREAD_START_POLICY_BITSHIFT 16
187 #define PTHREAD_START_POLICY_MASK 0xff
188 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
190 #define SCHED_OTHER POLICY_TIMESHARE
191 #define SCHED_FIFO POLICY_FIFO
192 #define SCHED_RR POLICY_RR
194 #define BASEPRI_DEFAULT 31
198 static uint32_t wq_stalled_window_usecs
= WQ_STALLED_WINDOW_USECS
;
199 static uint32_t wq_reduce_pool_window_usecs
= WQ_REDUCE_POOL_WINDOW_USECS
;
200 static uint32_t wq_max_timer_interval_usecs
= WQ_MAX_TIMER_INTERVAL_USECS
;
201 static uint32_t wq_max_threads
= WORKQUEUE_MAXTHREADS
;
202 static uint32_t wq_max_constrained_threads
= WORKQUEUE_MAXTHREADS
/ 8;
203 static uint32_t wq_max_concurrency
[WORKQUEUE_NUM_BUCKETS
+ 1]; // set to ncpus on load
205 SYSCTL_INT(_kern
, OID_AUTO
, wq_stalled_window_usecs
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
206 &wq_stalled_window_usecs
, 0, "");
208 SYSCTL_INT(_kern
, OID_AUTO
, wq_reduce_pool_window_usecs
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
209 &wq_reduce_pool_window_usecs
, 0, "");
211 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_timer_interval_usecs
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
212 &wq_max_timer_interval_usecs
, 0, "");
214 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_threads
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
215 &wq_max_threads
, 0, "");
217 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_constrained_threads
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
218 &wq_max_constrained_threads
, 0, "");
221 static int wq_kevent_test SYSCTL_HANDLER_ARGS
;
222 SYSCTL_PROC(_debug
, OID_AUTO
, wq_kevent_test
, CTLFLAG_MASKED
| CTLFLAG_RW
| CTLFLAG_LOCKED
| CTLFLAG_ANYBODY
| CTLTYPE_OPAQUE
, NULL
, 0, wq_kevent_test
, 0, "-");
225 static uint32_t wq_init_constrained_limit
= 1;
227 uint32_t pthread_debug_tracing
= 1;
229 SYSCTL_INT(_kern
, OID_AUTO
, pthread_debug_tracing
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
230 &pthread_debug_tracing
, 0, "")
232 static uint32_t pthread_mutex_default_policy
;
234 SYSCTL_INT(_kern
, OID_AUTO
, pthread_mutex_default_policy
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
235 &pthread_mutex_default_policy
, 0, "");
238 * +-----+-----+-----+-----+-----+-----+-----+
239 * | MT | BG | UT | DE | IN | UN | mgr |
240 * +-----+-----+-----+-----+-----+-----+-----+-----+
241 * | pri | 5 | 4 | 3 | 2 | 1 | 0 | 6 |
242 * | qos | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
243 * +-----+-----+-----+-----+-----+-----+-----+-----+
245 static inline uint32_t
246 _wq_bucket_to_thread_qos(int pri
)
248 if (pri
== WORKQUEUE_EVENT_MANAGER_BUCKET
) {
249 return WORKQUEUE_EVENT_MANAGER_BUCKET
+ 1;
251 return WORKQUEUE_EVENT_MANAGER_BUCKET
- pri
;
254 #pragma mark wq_thactive
256 #if defined(__LP64__)
258 // 7 * 16 bits for each QoS bucket request count (including manager)
259 // 3 bits of best QoS among all pending constrained requests
261 #define WQ_THACTIVE_BUCKET_WIDTH 16
262 #define WQ_THACTIVE_QOS_SHIFT (7 * WQ_THACTIVE_BUCKET_WIDTH)
265 // 6 * 10 bits for each QoS bucket request count (except manager)
266 // 1 bit for the manager bucket
267 // 3 bits of best QoS among all pending constrained requests
268 #define WQ_THACTIVE_BUCKET_WIDTH 10
269 #define WQ_THACTIVE_QOS_SHIFT (6 * WQ_THACTIVE_BUCKET_WIDTH + 1)
271 #define WQ_THACTIVE_BUCKET_MASK ((1U << WQ_THACTIVE_BUCKET_WIDTH) - 1)
272 #define WQ_THACTIVE_BUCKET_HALF (1U << (WQ_THACTIVE_BUCKET_WIDTH - 1))
273 #define WQ_THACTIVE_NO_PENDING_REQUEST 6
275 _Static_assert(sizeof(wq_thactive_t
) * CHAR_BIT
- WQ_THACTIVE_QOS_SHIFT
>= 3,
276 "Make sure we have space to encode a QoS");
278 static inline wq_thactive_t
279 _wq_thactive_fetch_and_add(struct workqueue
*wq
, wq_thactive_t offset
)
281 #if PTHREAD_INLINE_RMW_ATOMICS || !defined(__LP64__)
282 return atomic_fetch_add_explicit(&wq
->wq_thactive
, offset
,
283 memory_order_relaxed
);
285 return pthread_kern
->atomic_fetch_add_128_relaxed(&wq
->wq_thactive
, offset
);
289 static inline wq_thactive_t
290 _wq_thactive(struct workqueue
*wq
)
292 #if PTHREAD_INLINE_RMW_ATOMICS || !defined(__LP64__)
293 return atomic_load_explicit(&wq
->wq_thactive
, memory_order_relaxed
);
295 return pthread_kern
->atomic_load_128_relaxed(&wq
->wq_thactive
);
299 #define WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(tha) \
300 ((tha) >> WQ_THACTIVE_QOS_SHIFT)
302 static inline uint32_t
303 _wq_thactive_best_constrained_req_qos(struct workqueue
*wq
)
305 // Avoid expensive atomic operations: the three bits we're loading are in
306 // a single byte, and always updated under the workqueue lock
307 wq_thactive_t v
= *(wq_thactive_t
*)&wq
->wq_thactive
;
308 return WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(v
);
311 static inline wq_thactive_t
312 _wq_thactive_set_best_constrained_req_qos(struct workqueue
*wq
,
313 uint32_t orig_qos
, uint32_t new_qos
)
316 v
= (wq_thactive_t
)(new_qos
- orig_qos
) << WQ_THACTIVE_QOS_SHIFT
;
318 * We can do an atomic add relative to the initial load because updates
319 * to this qos are always serialized under the workqueue lock.
321 return _wq_thactive_fetch_and_add(wq
, v
) + v
;
324 static inline wq_thactive_t
325 _wq_thactive_offset_for_qos(int qos
)
327 return (wq_thactive_t
)1 << (qos
* WQ_THACTIVE_BUCKET_WIDTH
);
330 static inline wq_thactive_t
331 _wq_thactive_inc(struct workqueue
*wq
, int qos
)
333 return _wq_thactive_fetch_and_add(wq
, _wq_thactive_offset_for_qos(qos
));
336 static inline wq_thactive_t
337 _wq_thactive_dec(struct workqueue
*wq
, int qos
)
339 return _wq_thactive_fetch_and_add(wq
, -_wq_thactive_offset_for_qos(qos
));
342 static inline wq_thactive_t
343 _wq_thactive_move(struct workqueue
*wq
, int oldqos
, int newqos
)
345 return _wq_thactive_fetch_and_add(wq
, _wq_thactive_offset_for_qos(newqos
) -
346 _wq_thactive_offset_for_qos(oldqos
));
349 static inline uint32_t
350 _wq_thactive_aggregate_downto_qos(struct workqueue
*wq
, wq_thactive_t v
,
351 int qos
, uint32_t *busycount
, uint32_t *max_busycount
)
353 uint32_t count
= 0, active
;
358 * on 32bits the manager bucket is a single bit and the best constrained
359 * request QoS 3 bits are where the 10 bits of a regular QoS bucket count
360 * would be. Mask them out.
362 v
&= ~(~0ull << WQ_THACTIVE_QOS_SHIFT
);
365 curtime
= mach_absolute_time();
369 *max_busycount
= qos
+ 1;
371 for (int i
= 0; i
<= qos
; i
++, v
>>= WQ_THACTIVE_BUCKET_WIDTH
) {
372 active
= v
& WQ_THACTIVE_BUCKET_MASK
;
374 if (busycount
&& wq
->wq_thscheduled_count
[i
] > active
) {
375 if (wq_thread_is_busy(curtime
, &wq
->wq_lastblocked_ts
[i
])) {
377 * We only consider the last blocked thread for a given bucket
378 * as busy because we don't want to take the list lock in each
379 * sched callback. However this is an approximation that could
380 * contribute to thread creation storms.
389 #pragma mark - Process/Thread Setup/Teardown syscalls
391 static mach_vm_offset_t
392 stack_addr_hint(proc_t p
, vm_map_t vmap
)
394 mach_vm_offset_t stackaddr
;
395 mach_vm_offset_t aslr_offset
;
396 bool proc64bit
= proc_is64bit(p
);
398 // We can't safely take random values % something unless its a power-of-two
399 _Static_assert(powerof2(PTH_DEFAULT_STACKSIZE
), "PTH_DEFAULT_STACKSIZE is a power-of-two");
401 #if defined(__i386__) || defined(__x86_64__)
403 // Matches vm_map_get_max_aslr_slide_pages's image shift in xnu
404 aslr_offset
= random() % (1 << 28); // about 512 stacks
406 // Actually bigger than the image shift, we've got ~256MB to work with
407 aslr_offset
= random() % (16 * PTH_DEFAULT_STACKSIZE
);
409 aslr_offset
= vm_map_trunc_page_mask(aslr_offset
, vm_map_page_mask(vmap
));
411 // Above nanomalloc range (see NANOZONE_SIGNATURE)
412 stackaddr
= 0x700000000000 + aslr_offset
;
414 stackaddr
= SHARED_REGION_BASE_I386
+ SHARED_REGION_SIZE_I386
+ aslr_offset
;
416 #elif defined(__arm__) || defined(__arm64__)
417 user_addr_t main_thread_stack_top
= 0;
418 if (pthread_kern
->proc_get_user_stack
) {
419 main_thread_stack_top
= pthread_kern
->proc_get_user_stack(p
);
421 if (proc64bit
&& main_thread_stack_top
) {
422 // The main thread stack position is randomly slid by xnu (c.f.
423 // load_main() in mach_loader.c), so basing pthread stack allocations
424 // where the main thread stack ends is already ASLRd and doing so
425 // avoids creating a gap in the process address space that may cause
426 // extra PTE memory usage. rdar://problem/33328206
427 stackaddr
= vm_map_trunc_page_mask((vm_map_offset_t
)main_thread_stack_top
,
428 vm_map_page_mask(vmap
));
430 // vm_map_get_max_aslr_slide_pages ensures 1MB of slide, we do better
431 aslr_offset
= random() % ((proc64bit
? 4 : 2) * PTH_DEFAULT_STACKSIZE
);
432 aslr_offset
= vm_map_trunc_page_mask((vm_map_offset_t
)aslr_offset
,
433 vm_map_page_mask(vmap
));
435 // 64 stacks below shared region
436 stackaddr
= SHARED_REGION_BASE_ARM64
- 64 * PTH_DEFAULT_STACKSIZE
- aslr_offset
;
438 // If you try to slide down from this point, you risk ending up in memory consumed by malloc
439 stackaddr
= SHARED_REGION_BASE_ARM
- 32 * PTH_DEFAULT_STACKSIZE
+ aslr_offset
;
443 #error Need to define a stack address hint for this architecture
449 * bsdthread_create system call. Used by pthread_create.
452 _bsdthread_create(struct proc
*p
, user_addr_t user_func
, user_addr_t user_funcarg
, user_addr_t user_stack
, user_addr_t user_pthread
, uint32_t flags
, user_addr_t
*retval
)
458 mach_vm_offset_t stackaddr
;
459 mach_vm_size_t th_allocsize
= 0;
460 mach_vm_size_t th_guardsize
;
461 mach_vm_offset_t th_stack
;
462 mach_vm_offset_t th_pthread
;
463 mach_vm_offset_t th_tsd_base
;
464 mach_port_name_t th_thport
;
466 vm_map_t vmap
= pthread_kern
->current_map();
467 task_t ctask
= current_task();
468 unsigned int policy
, importance
;
473 if (pthread_kern
->proc_get_register(p
) == 0) {
477 PTHREAD_TRACE(TRACE_pthread_thread_create
| DBG_FUNC_START
, flags
, 0, 0, 0, 0);
479 isLP64
= proc_is64bit(p
);
480 th_guardsize
= vm_map_page_size(vmap
);
482 stackaddr
= pthread_kern
->proc_get_stack_addr_hint(p
);
483 kret
= pthread_kern
->thread_create(ctask
, &th
);
484 if (kret
!= KERN_SUCCESS
)
486 thread_reference(th
);
488 pthread_kern
->thread_set_tag(th
, THREAD_TAG_PTHREAD
);
490 sright
= (void *)pthread_kern
->convert_thread_to_port(th
);
491 th_thport
= pthread_kern
->ipc_port_copyout_send(sright
, pthread_kern
->task_get_ipcspace(ctask
));
492 if (!MACH_PORT_VALID(th_thport
)) {
493 error
= EMFILE
; // userland will convert this into a crash
497 if ((flags
& PTHREAD_START_CUSTOM
) == 0) {
498 mach_vm_size_t pthread_size
=
499 vm_map_round_page_mask(pthread_kern
->proc_get_pthsize(p
) + PTHREAD_T_OFFSET
, vm_map_page_mask(vmap
));
500 th_allocsize
= th_guardsize
+ user_stack
+ pthread_size
;
501 user_stack
+= PTHREAD_T_OFFSET
;
503 kret
= mach_vm_map(vmap
, &stackaddr
,
506 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
, NULL
,
507 0, FALSE
, VM_PROT_DEFAULT
, VM_PROT_ALL
,
509 if (kret
!= KERN_SUCCESS
){
510 kret
= mach_vm_allocate(vmap
,
511 &stackaddr
, th_allocsize
,
512 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
);
514 if (kret
!= KERN_SUCCESS
) {
519 PTHREAD_TRACE(TRACE_pthread_thread_create
|DBG_FUNC_NONE
, th_allocsize
, stackaddr
, 0, 2, 0);
523 * The guard page is at the lowest address
524 * The stack base is the highest address
526 kret
= mach_vm_protect(vmap
, stackaddr
, th_guardsize
, FALSE
, VM_PROT_NONE
);
528 if (kret
!= KERN_SUCCESS
) {
533 th_pthread
= stackaddr
+ th_guardsize
+ user_stack
;
534 th_stack
= th_pthread
;
537 * Pre-fault the first page of the new thread's stack and the page that will
538 * contain the pthread_t structure.
540 if (vm_map_trunc_page_mask((vm_map_offset_t
)(th_stack
- C_64_REDZONE_LEN
), vm_map_page_mask(vmap
)) !=
541 vm_map_trunc_page_mask((vm_map_offset_t
)th_pthread
, vm_map_page_mask(vmap
))){
543 vm_map_trunc_page_mask((vm_map_offset_t
)(th_stack
- C_64_REDZONE_LEN
), vm_map_page_mask(vmap
)),
544 VM_PROT_READ
| VM_PROT_WRITE
,
546 THREAD_UNINT
, NULL
, 0);
550 vm_map_trunc_page_mask((vm_map_offset_t
)th_pthread
, vm_map_page_mask(vmap
)),
551 VM_PROT_READ
| VM_PROT_WRITE
,
553 THREAD_UNINT
, NULL
, 0);
556 th_stack
= user_stack
;
557 th_pthread
= user_pthread
;
559 PTHREAD_TRACE(TRACE_pthread_thread_create
|DBG_FUNC_NONE
, 0, 0, 0, 3, 0);
562 tsd_offset
= pthread_kern
->proc_get_pthread_tsd_offset(p
);
564 th_tsd_base
= th_pthread
+ tsd_offset
;
565 kret
= pthread_kern
->thread_set_tsd_base(th
, th_tsd_base
);
566 if (kret
== KERN_SUCCESS
) {
567 flags
|= PTHREAD_START_TSD_BASE_SET
;
571 #if defined(__i386__) || defined(__x86_64__)
573 * Set up i386 registers & function call.
576 x86_thread_state32_t state
= {
577 .eip
= (unsigned int)pthread_kern
->proc_get_threadstart(p
),
578 .eax
= (unsigned int)th_pthread
,
579 .ebx
= (unsigned int)th_thport
,
580 .ecx
= (unsigned int)user_func
,
581 .edx
= (unsigned int)user_funcarg
,
582 .edi
= (unsigned int)user_stack
,
583 .esi
= (unsigned int)flags
,
587 .esp
= (int)((vm_offset_t
)(th_stack
-C_32_STK_ALIGN
))
590 error
= pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
591 if (error
!= KERN_SUCCESS
) {
596 x86_thread_state64_t state64
= {
597 .rip
= (uint64_t)pthread_kern
->proc_get_threadstart(p
),
598 .rdi
= (uint64_t)th_pthread
,
599 .rsi
= (uint64_t)(th_thport
),
600 .rdx
= (uint64_t)user_func
,
601 .rcx
= (uint64_t)user_funcarg
,
602 .r8
= (uint64_t)user_stack
,
603 .r9
= (uint64_t)flags
,
605 * set stack pointer aligned to 16 byte boundary
607 .rsp
= (uint64_t)(th_stack
- C_64_REDZONE_LEN
)
610 error
= pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state64
);
611 if (error
!= KERN_SUCCESS
) {
617 #elif defined(__arm__)
618 arm_thread_state_t state
= {
619 .pc
= (int)pthread_kern
->proc_get_threadstart(p
),
620 .r
[0] = (unsigned int)th_pthread
,
621 .r
[1] = (unsigned int)th_thport
,
622 .r
[2] = (unsigned int)user_func
,
623 .r
[3] = (unsigned int)user_funcarg
,
624 .r
[4] = (unsigned int)user_stack
,
625 .r
[5] = (unsigned int)flags
,
627 /* Set r7 & lr to 0 for better back tracing */
634 .sp
= (int)((vm_offset_t
)(th_stack
-C_32_STK_ALIGN
))
637 (void) pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
640 #error bsdthread_create not defined for this architecture
643 if ((flags
& PTHREAD_START_SETSCHED
) != 0) {
644 /* Set scheduling parameters if needed */
645 thread_extended_policy_data_t extinfo
;
646 thread_precedence_policy_data_t precedinfo
;
648 importance
= (flags
& PTHREAD_START_IMPORTANCE_MASK
);
649 policy
= (flags
>> PTHREAD_START_POLICY_BITSHIFT
) & PTHREAD_START_POLICY_MASK
;
651 if (policy
== SCHED_OTHER
) {
652 extinfo
.timeshare
= 1;
654 extinfo
.timeshare
= 0;
657 thread_policy_set(th
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extinfo
, THREAD_EXTENDED_POLICY_COUNT
);
659 precedinfo
.importance
= (importance
- BASEPRI_DEFAULT
);
660 thread_policy_set(th
, THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&precedinfo
, THREAD_PRECEDENCE_POLICY_COUNT
);
661 } else if ((flags
& PTHREAD_START_QOSCLASS
) != 0) {
662 /* Set thread QoS class if requested. */
663 pthread_priority_t priority
= (pthread_priority_t
)(flags
& PTHREAD_START_QOSCLASS_MASK
);
665 thread_qos_policy_data_t qos
;
666 qos
.qos_tier
= pthread_priority_get_thread_qos(priority
);
667 qos
.tier_importance
= (qos
.qos_tier
== QOS_CLASS_UNSPECIFIED
) ? 0 :
668 _pthread_priority_get_relpri(priority
);
670 pthread_kern
->thread_policy_set_internal(th
, THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, THREAD_QOS_POLICY_COUNT
);
673 if (pthread_kern
->proc_get_mach_thread_self_tsd_offset
) {
674 uint64_t mach_thread_self_offset
=
675 pthread_kern
->proc_get_mach_thread_self_tsd_offset(p
);
676 if (mach_thread_self_offset
&& tsd_offset
) {
677 bool proc64bit
= proc_is64bit(p
);
679 uint64_t th_thport_tsd
= (uint64_t)th_thport
;
680 error
= copyout(&th_thport_tsd
, th_pthread
+ tsd_offset
+
681 mach_thread_self_offset
, sizeof(th_thport_tsd
));
683 uint32_t th_thport_tsd
= (uint32_t)th_thport
;
684 error
= copyout(&th_thport_tsd
, th_pthread
+ tsd_offset
+
685 mach_thread_self_offset
, sizeof(th_thport_tsd
));
693 kret
= pthread_kern
->thread_resume(th
);
694 if (kret
!= KERN_SUCCESS
) {
698 thread_deallocate(th
); /* drop the creator reference */
700 PTHREAD_TRACE(TRACE_pthread_thread_create
|DBG_FUNC_END
, error
, th_pthread
, 0, 0, 0);
702 // cast required as mach_vm_offset_t is always 64 bits even on 32-bit platforms
703 *retval
= (user_addr_t
)th_pthread
;
708 if (allocated
!= 0) {
709 (void)mach_vm_deallocate(vmap
, stackaddr
, th_allocsize
);
712 (void)pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(ctask
), th_thport
);
713 if (pthread_kern
->thread_will_park_or_terminate
) {
714 pthread_kern
->thread_will_park_or_terminate(th
);
716 (void)thread_terminate(th
);
717 (void)thread_deallocate(th
);
722 * bsdthread_terminate system call. Used by pthread_terminate
725 _bsdthread_terminate(__unused
struct proc
*p
,
726 user_addr_t stackaddr
,
730 __unused
int32_t *retval
)
732 mach_vm_offset_t freeaddr
;
733 mach_vm_size_t freesize
;
735 thread_t th
= current_thread();
737 freeaddr
= (mach_vm_offset_t
)stackaddr
;
740 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_START
, freeaddr
, freesize
, kthport
, 0xff, 0);
742 if ((freesize
!= (mach_vm_size_t
)0) && (freeaddr
!= (mach_vm_offset_t
)0)) {
743 if (pthread_kern
->thread_get_tag(th
) & THREAD_TAG_MAINTHREAD
){
744 vm_map_t user_map
= pthread_kern
->current_map();
745 freesize
= vm_map_trunc_page_mask((vm_map_offset_t
)freesize
- 1, vm_map_page_mask(user_map
));
746 kret
= mach_vm_behavior_set(user_map
, freeaddr
, freesize
, VM_BEHAVIOR_REUSABLE
);
747 assert(kret
== KERN_SUCCESS
|| kret
== KERN_INVALID_ADDRESS
);
748 kret
= kret
? kret
: mach_vm_protect(user_map
, freeaddr
, freesize
, FALSE
, VM_PROT_NONE
);
749 assert(kret
== KERN_SUCCESS
|| kret
== KERN_INVALID_ADDRESS
);
751 kret
= mach_vm_deallocate(pthread_kern
->current_map(), freeaddr
, freesize
);
752 if (kret
!= KERN_SUCCESS
) {
753 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, kret
, 0, 0, 0, 0);
759 if (pthread_kern
->thread_will_park_or_terminate
) {
760 pthread_kern
->thread_will_park_or_terminate(th
);
762 (void)thread_terminate(th
);
763 if (sem
!= MACH_PORT_NULL
) {
764 kret
= pthread_kern
->semaphore_signal_internal_trap(sem
);
765 if (kret
!= KERN_SUCCESS
) {
766 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, kret
, 0, 0, 0, 0);
771 if (kthport
!= MACH_PORT_NULL
) {
772 pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(current_task()), kthport
);
775 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, 0, 0, 0, 0, 0);
777 pthread_kern
->thread_exception_return();
778 panic("bsdthread_terminate: still running\n");
780 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, 0, 0xff, 0, 0, 0);
786 * bsdthread_register system call. Performs per-process setup. Responsible for
787 * returning capabilitiy bits to userspace and receiving userspace function addresses.
790 _bsdthread_register(struct proc
*p
,
791 user_addr_t threadstart
,
792 user_addr_t wqthread
,
794 user_addr_t pthread_init_data
,
795 user_addr_t pthread_init_data_size
,
796 uint64_t dispatchqueue_offset
,
799 struct _pthread_registration_data data
= {};
800 uint32_t max_tsd_offset
;
802 size_t pthread_init_sz
= 0;
804 /* syscall randomizer test can pass bogus values */
805 if (pthsize
< 0 || pthsize
> MAX_PTHREAD_SIZE
) {
809 * if we have pthread_init_data, then we use that and target_concptr
810 * (which is an offset) get data.
812 if (pthread_init_data
!= 0) {
813 if (pthread_init_data_size
< sizeof(data
.version
)) {
816 pthread_init_sz
= MIN(sizeof(data
), (size_t)pthread_init_data_size
);
817 int ret
= copyin(pthread_init_data
, &data
, pthread_init_sz
);
821 if (data
.version
!= (size_t)pthread_init_data_size
) {
825 data
.dispatch_queue_offset
= dispatchqueue_offset
;
828 /* We have to do this before proc_get_register so that it resets after fork */
829 mach_vm_offset_t stackaddr
= stack_addr_hint(p
, pthread_kern
->current_map());
830 pthread_kern
->proc_set_stack_addr_hint(p
, (user_addr_t
)stackaddr
);
832 /* prevent multiple registrations */
833 if (pthread_kern
->proc_get_register(p
) != 0) {
837 pthread_kern
->proc_set_threadstart(p
, threadstart
);
838 pthread_kern
->proc_set_wqthread(p
, wqthread
);
839 pthread_kern
->proc_set_pthsize(p
, pthsize
);
840 pthread_kern
->proc_set_register(p
);
842 uint32_t tsd_slot_sz
= proc_is64bit(p
) ? sizeof(uint64_t) : sizeof(uint32_t);
843 if ((uint32_t)pthsize
>= tsd_slot_sz
&&
844 data
.tsd_offset
<= (uint32_t)(pthsize
- tsd_slot_sz
)) {
845 max_tsd_offset
= ((uint32_t)pthsize
- data
.tsd_offset
- tsd_slot_sz
);
850 pthread_kern
->proc_set_pthread_tsd_offset(p
, data
.tsd_offset
);
852 if (data
.dispatch_queue_offset
> max_tsd_offset
) {
853 data
.dispatch_queue_offset
= 0;
855 pthread_kern
->proc_set_dispatchqueue_offset(p
, data
.dispatch_queue_offset
);
857 if (pthread_kern
->proc_set_return_to_kernel_offset
) {
858 if (data
.return_to_kernel_offset
> max_tsd_offset
) {
859 data
.return_to_kernel_offset
= 0;
861 pthread_kern
->proc_set_return_to_kernel_offset(p
,
862 data
.return_to_kernel_offset
);
865 if (pthread_kern
->proc_set_mach_thread_self_tsd_offset
) {
866 if (data
.mach_thread_self_offset
> max_tsd_offset
) {
867 data
.mach_thread_self_offset
= 0;
869 pthread_kern
->proc_set_mach_thread_self_tsd_offset(p
,
870 data
.mach_thread_self_offset
);
873 if (pthread_init_data
!= 0) {
874 /* Outgoing data that userspace expects as a reply */
875 data
.version
= sizeof(struct _pthread_registration_data
);
876 if (pthread_kern
->qos_main_thread_active()) {
877 mach_msg_type_number_t nqos
= THREAD_QOS_POLICY_COUNT
;
878 thread_qos_policy_data_t qos
;
879 boolean_t gd
= FALSE
;
881 kr
= pthread_kern
->thread_policy_get(current_thread(), THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, &nqos
, &gd
);
882 if (kr
!= KERN_SUCCESS
|| qos
.qos_tier
== THREAD_QOS_UNSPECIFIED
) {
883 /* Unspecified threads means the kernel wants us to impose legacy upon the thread. */
884 qos
.qos_tier
= THREAD_QOS_LEGACY
;
885 qos
.tier_importance
= 0;
887 kr
= pthread_kern
->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, THREAD_QOS_POLICY_COUNT
);
890 if (kr
== KERN_SUCCESS
) {
891 data
.main_qos
= thread_qos_get_pthread_priority(qos
.qos_tier
);
893 data
.main_qos
= _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED
, 0, 0);
896 data
.main_qos
= _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED
, 0, 0);
899 data
.mutex_default_policy
= pthread_mutex_default_policy
;
901 kr
= copyout(&data
, pthread_init_data
, pthread_init_sz
);
902 if (kr
!= KERN_SUCCESS
) {
907 /* return the supported feature set as the return value. */
908 *retval
= PTHREAD_FEATURE_SUPPORTED
;
913 #pragma mark - QoS Manipulation
916 _bsdthread_ctl_set_qos(struct proc
*p
, user_addr_t __unused cmd
, mach_port_name_t kport
, user_addr_t tsd_priority_addr
, user_addr_t arg3
, int *retval
)
921 pthread_priority_t priority
;
923 /* Unused parameters must be zero. */
928 /* QoS is stored in a given slot in the pthread TSD. We need to copy that in and set our QoS based on it. */
929 if (proc_is64bit(p
)) {
931 rv
= copyin(tsd_priority_addr
, &v
, sizeof(v
));
933 priority
= (int)(v
& 0xffffffff);
936 rv
= copyin(tsd_priority_addr
, &v
, sizeof(v
));
941 if ((th
= port_name_to_thread(kport
)) == THREAD_NULL
) {
945 /* <rdar://problem/16211829> Disable pthread_set_qos_class_np() on threads other than pthread_self */
946 if (th
!= current_thread()) {
947 thread_deallocate(th
);
951 rv
= _bsdthread_ctl_set_self(p
, 0, priority
, 0, _PTHREAD_SET_SELF_QOS_FLAG
, retval
);
953 /* Static param the thread, we just set QoS on it, so its stuck in QoS land now. */
954 /* pthread_kern->thread_static_param(th, TRUE); */ // see <rdar://problem/16433744>, for details
956 thread_deallocate(th
);
962 static inline struct threadlist
*
963 util_get_thread_threadlist_entry(thread_t th
)
965 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
967 struct threadlist
*tl
= pthread_kern
->uthread_get_threadlist(uth
);
974 _workq_thread_has_been_unbound(thread_t th
, int qos_class
)
976 struct threadlist
*tl
= util_get_thread_threadlist_entry(th
);
981 struct workqueue
*wq
= tl
->th_workq
;
982 workqueue_lock_spin(wq
);
984 if (tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
) {
986 } else if (qos_class
!= class_index_get_thread_qos(tl
->th_priority
)) {
990 if ((tl
->th_flags
& TH_LIST_KEVENT_BOUND
)){
993 tl
->th_flags
&= ~TH_LIST_KEVENT_BOUND
;
995 workqueue_unlock(wq
);
999 workqueue_unlock(wq
);
1004 _bsdthread_ctl_set_self(struct proc
*p
, user_addr_t __unused cmd
, pthread_priority_t priority
, mach_port_name_t voucher
, _pthread_set_flags_t flags
, int __unused
*retval
)
1006 thread_qos_policy_data_t qos
;
1007 mach_msg_type_number_t nqos
= THREAD_QOS_POLICY_COUNT
;
1008 boolean_t gd
= FALSE
;
1009 thread_t th
= current_thread();
1010 struct workqueue
*wq
= NULL
;
1011 struct threadlist
*tl
= NULL
;
1014 int qos_rv
= 0, voucher_rv
= 0, fixedpri_rv
= 0;
1016 if ((flags
& _PTHREAD_SET_SELF_WQ_KEVENT_UNBIND
) != 0) {
1017 tl
= util_get_thread_threadlist_entry(th
);
1024 workqueue_lock_spin(wq
);
1025 if (tl
->th_flags
& TH_LIST_KEVENT_BOUND
) {
1026 tl
->th_flags
&= ~TH_LIST_KEVENT_BOUND
;
1027 unsigned int kevent_flags
= KEVENT_FLAG_WORKQ
| KEVENT_FLAG_UNBIND_CHECK_FLAGS
;
1028 if (tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
) {
1029 kevent_flags
|= KEVENT_FLAG_WORKQ_MANAGER
;
1032 workqueue_unlock(wq
);
1033 __assert_only
int ret
= kevent_qos_internal_unbind(p
, class_index_get_thread_qos(tl
->th_priority
), th
, kevent_flags
);
1036 workqueue_unlock(wq
);
1041 if ((flags
& _PTHREAD_SET_SELF_QOS_FLAG
) != 0) {
1042 kr
= pthread_kern
->thread_policy_get(th
, THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, &nqos
, &gd
);
1043 if (kr
!= KERN_SUCCESS
) {
1049 * If we have main-thread QoS then we don't allow a thread to come out
1050 * of QOS_CLASS_UNSPECIFIED.
1052 if (pthread_kern
->qos_main_thread_active() && qos
.qos_tier
==
1053 THREAD_QOS_UNSPECIFIED
) {
1059 tl
= util_get_thread_threadlist_entry(th
);
1060 if (tl
) wq
= tl
->th_workq
;
1063 PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self
| DBG_FUNC_START
, wq
, qos
.qos_tier
, qos
.tier_importance
, 0, 0);
1065 qos
.qos_tier
= pthread_priority_get_thread_qos(priority
);
1066 qos
.tier_importance
= (qos
.qos_tier
== QOS_CLASS_UNSPECIFIED
) ? 0 : _pthread_priority_get_relpri(priority
);
1068 if (qos
.qos_tier
== QOS_CLASS_UNSPECIFIED
||
1069 qos
.tier_importance
> 0 || qos
.tier_importance
< THREAD_QOS_MIN_TIER_IMPORTANCE
) {
1075 * If we're a workqueue, the threadlist item priority needs adjusting,
1076 * along with the bucket we were running in.
1079 bool try_run_threadreq
= false;
1081 workqueue_lock_spin(wq
);
1082 kr
= pthread_kern
->thread_set_workq_qos(th
, qos
.qos_tier
, qos
.tier_importance
);
1083 assert(kr
== KERN_SUCCESS
|| kr
== KERN_TERMINATED
);
1085 /* Fix up counters. */
1086 uint8_t old_bucket
= tl
->th_priority
;
1087 uint8_t new_bucket
= pthread_priority_get_class_index(priority
);
1089 if (old_bucket
!= new_bucket
) {
1090 _wq_thactive_move(wq
, old_bucket
, new_bucket
);
1091 wq
->wq_thscheduled_count
[old_bucket
]--;
1092 wq
->wq_thscheduled_count
[new_bucket
]++;
1093 if (old_bucket
== WORKQUEUE_EVENT_MANAGER_BUCKET
||
1094 old_bucket
< new_bucket
) {
1096 * if the QoS of the thread was lowered, then this could
1097 * allow for a higher QoS thread request to run, so we need
1100 try_run_threadreq
= true;
1102 tl
->th_priority
= new_bucket
;
1105 bool old_overcommit
= !(tl
->th_flags
& TH_LIST_CONSTRAINED
);
1106 bool new_overcommit
= priority
& _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
;
1107 if (!old_overcommit
&& new_overcommit
) {
1108 if (wq
->wq_constrained_threads_scheduled
-- ==
1109 wq_max_constrained_threads
) {
1110 try_run_threadreq
= true;
1112 tl
->th_flags
&= ~TH_LIST_CONSTRAINED
;
1113 } else if (old_overcommit
&& !new_overcommit
) {
1114 wq
->wq_constrained_threads_scheduled
++;
1115 tl
->th_flags
|= TH_LIST_CONSTRAINED
;
1118 if (try_run_threadreq
) {
1119 workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, NULL
, true);
1121 workqueue_unlock(wq
);
1124 kr
= pthread_kern
->thread_policy_set_internal(th
, THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, THREAD_QOS_POLICY_COUNT
);
1125 if (kr
!= KERN_SUCCESS
) {
1130 PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self
| DBG_FUNC_END
, wq
, qos
.qos_tier
, qos
.tier_importance
, 0, 0);
1134 if ((flags
& _PTHREAD_SET_SELF_VOUCHER_FLAG
) != 0) {
1135 kr
= pthread_kern
->thread_set_voucher_name(voucher
);
1136 if (kr
!= KERN_SUCCESS
) {
1137 voucher_rv
= ENOENT
;
1143 if (qos_rv
) goto done
;
1144 if ((flags
& _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG
) != 0) {
1145 thread_extended_policy_data_t extpol
= {.timeshare
= 0};
1147 if (!tl
) tl
= util_get_thread_threadlist_entry(th
);
1149 /* Not allowed on workqueue threads */
1150 fixedpri_rv
= ENOTSUP
;
1154 kr
= pthread_kern
->thread_policy_set_internal(th
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extpol
, THREAD_EXTENDED_POLICY_COUNT
);
1155 if (kr
!= KERN_SUCCESS
) {
1156 fixedpri_rv
= EINVAL
;
1159 } else if ((flags
& _PTHREAD_SET_SELF_TIMESHARE_FLAG
) != 0) {
1160 thread_extended_policy_data_t extpol
= {.timeshare
= 1};
1162 if (!tl
) tl
= util_get_thread_threadlist_entry(th
);
1164 /* Not allowed on workqueue threads */
1165 fixedpri_rv
= ENOTSUP
;
1169 kr
= pthread_kern
->thread_policy_set_internal(th
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extpol
, THREAD_EXTENDED_POLICY_COUNT
);
1170 if (kr
!= KERN_SUCCESS
) {
1171 fixedpri_rv
= EINVAL
;
1177 if (qos_rv
&& voucher_rv
) {
1178 /* Both failed, give that a unique error. */
1198 _bsdthread_ctl_qos_override_start(struct proc __unused
*p
, user_addr_t __unused cmd
, mach_port_name_t kport
, pthread_priority_t priority
, user_addr_t resource
, int __unused
*retval
)
1203 if ((th
= port_name_to_thread(kport
)) == THREAD_NULL
) {
1207 int override_qos
= pthread_priority_get_thread_qos(priority
);
1209 struct threadlist
*tl
= util_get_thread_threadlist_entry(th
);
1211 PTHREAD_TRACE_WQ(TRACE_wq_override_start
| DBG_FUNC_NONE
, tl
->th_workq
, thread_tid(th
), 1, priority
, 0);
1214 /* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */
1215 pthread_kern
->proc_usynch_thread_qos_add_override_for_resource_check_owner(th
, override_qos
, TRUE
,
1216 resource
, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE
, USER_ADDR_NULL
, MACH_PORT_NULL
);
1217 thread_deallocate(th
);
1222 _bsdthread_ctl_qos_override_end(struct proc __unused
*p
, user_addr_t __unused cmd
, mach_port_name_t kport
, user_addr_t resource
, user_addr_t arg3
, int __unused
*retval
)
1231 if ((th
= port_name_to_thread(kport
)) == THREAD_NULL
) {
1235 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
1237 struct threadlist
*tl
= util_get_thread_threadlist_entry(th
);
1239 PTHREAD_TRACE_WQ(TRACE_wq_override_end
| DBG_FUNC_NONE
, tl
->th_workq
, thread_tid(th
), 0, 0, 0);
1242 pthread_kern
->proc_usynch_thread_qos_remove_override_for_resource(current_task(), uth
, 0, resource
, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE
);
1244 thread_deallocate(th
);
1249 _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(mach_port_name_t kport
, pthread_priority_t priority
, user_addr_t resource
, user_addr_t ulock_addr
)
1254 if ((th
= port_name_to_thread(kport
)) == THREAD_NULL
) {
1258 int override_qos
= pthread_priority_get_thread_qos(priority
);
1260 struct threadlist
*tl
= util_get_thread_threadlist_entry(th
);
1262 thread_deallocate(th
);
1266 PTHREAD_TRACE_WQ(TRACE_wq_override_dispatch
| DBG_FUNC_NONE
, tl
->th_workq
, thread_tid(th
), 1, priority
, 0);
1268 rv
= pthread_kern
->proc_usynch_thread_qos_add_override_for_resource_check_owner(th
, override_qos
, TRUE
,
1269 resource
, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE
, ulock_addr
, kport
);
1271 thread_deallocate(th
);
1275 int _bsdthread_ctl_qos_dispatch_asynchronous_override_add(struct proc __unused
*p
, user_addr_t __unused cmd
,
1276 mach_port_name_t kport
, pthread_priority_t priority
, user_addr_t resource
, int __unused
*retval
)
1278 return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport
, priority
, resource
, USER_ADDR_NULL
);
1282 _bsdthread_ctl_qos_override_dispatch(struct proc
*p __unused
, user_addr_t cmd __unused
, mach_port_name_t kport
, pthread_priority_t priority
, user_addr_t ulock_addr
, int __unused
*retval
)
1284 return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport
, priority
, USER_ADDR_NULL
, ulock_addr
);
1288 _bsdthread_ctl_qos_override_reset(struct proc
*p
, user_addr_t cmd
, user_addr_t arg1
, user_addr_t arg2
, user_addr_t arg3
, int *retval
)
1290 if (arg1
!= 0 || arg2
!= 0 || arg3
!= 0) {
1294 return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p
, cmd
, 1 /* reset_all */, 0, 0, retval
);
1298 _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(struct proc __unused
*p
, user_addr_t __unused cmd
, int reset_all
, user_addr_t resource
, user_addr_t arg3
, int __unused
*retval
)
1300 if ((reset_all
&& (resource
!= 0)) || arg3
!= 0) {
1304 thread_t th
= current_thread();
1305 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
1306 struct threadlist
*tl
= pthread_kern
->uthread_get_threadlist(uth
);
1312 PTHREAD_TRACE_WQ(TRACE_wq_override_reset
| DBG_FUNC_NONE
, tl
->th_workq
, 0, 0, 0, 0);
1314 resource
= reset_all
? THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD
: resource
;
1315 pthread_kern
->proc_usynch_thread_qos_reset_override_for_resource(current_task(), uth
, 0, resource
, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE
);
1321 _bsdthread_ctl_max_parallelism(struct proc __unused
*p
, user_addr_t __unused cmd
,
1322 int qos
, unsigned long flags
, int *retval
)
1324 _Static_assert(QOS_PARALLELISM_COUNT_LOGICAL
==
1325 _PTHREAD_QOS_PARALLELISM_COUNT_LOGICAL
, "logical");
1326 _Static_assert(QOS_PARALLELISM_REALTIME
==
1327 _PTHREAD_QOS_PARALLELISM_REALTIME
, "realtime");
1329 if (flags
& ~(QOS_PARALLELISM_REALTIME
| QOS_PARALLELISM_COUNT_LOGICAL
)) {
1333 if (flags
& QOS_PARALLELISM_REALTIME
) {
1337 } else if (qos
== THREAD_QOS_UNSPECIFIED
|| qos
>= THREAD_QOS_LAST
) {
1341 *retval
= pthread_kern
->qos_max_parallelism(qos
, flags
);
1346 _bsdthread_ctl(struct proc
*p
, user_addr_t cmd
, user_addr_t arg1
, user_addr_t arg2
, user_addr_t arg3
, int *retval
)
1349 case BSDTHREAD_CTL_SET_QOS
:
1350 return _bsdthread_ctl_set_qos(p
, cmd
, (mach_port_name_t
)arg1
, arg2
, arg3
, retval
);
1351 case BSDTHREAD_CTL_QOS_OVERRIDE_START
:
1352 return _bsdthread_ctl_qos_override_start(p
, cmd
, (mach_port_name_t
)arg1
, (pthread_priority_t
)arg2
, arg3
, retval
);
1353 case BSDTHREAD_CTL_QOS_OVERRIDE_END
:
1354 return _bsdthread_ctl_qos_override_end(p
, cmd
, (mach_port_name_t
)arg1
, arg2
, arg3
, retval
);
1355 case BSDTHREAD_CTL_QOS_OVERRIDE_RESET
:
1356 return _bsdthread_ctl_qos_override_reset(p
, cmd
, arg1
, arg2
, arg3
, retval
);
1357 case BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH
:
1358 return _bsdthread_ctl_qos_override_dispatch(p
, cmd
, (mach_port_name_t
)arg1
, (pthread_priority_t
)arg2
, arg3
, retval
);
1359 case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD
:
1360 return _bsdthread_ctl_qos_dispatch_asynchronous_override_add(p
, cmd
, (mach_port_name_t
)arg1
, (pthread_priority_t
)arg2
, arg3
, retval
);
1361 case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET
:
1362 return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p
, cmd
, (int)arg1
, arg2
, arg3
, retval
);
1363 case BSDTHREAD_CTL_SET_SELF
:
1364 return _bsdthread_ctl_set_self(p
, cmd
, (pthread_priority_t
)arg1
, (mach_port_name_t
)arg2
, (_pthread_set_flags_t
)arg3
, retval
);
1365 case BSDTHREAD_CTL_QOS_MAX_PARALLELISM
:
1366 return _bsdthread_ctl_max_parallelism(p
, cmd
, (int)arg1
, (unsigned long)arg2
, retval
);
1372 #pragma mark - Workqueue Implementation
1374 #pragma mark wq_flags
1376 static inline uint32_t
1377 _wq_flags(struct workqueue
*wq
)
1379 return atomic_load_explicit(&wq
->wq_flags
, memory_order_relaxed
);
1383 _wq_exiting(struct workqueue
*wq
)
1385 return _wq_flags(wq
) & WQ_EXITING
;
1388 static inline uint32_t
1389 _wq_flags_or_orig(struct workqueue
*wq
, uint32_t v
)
1391 #if PTHREAD_INLINE_RMW_ATOMICS
1394 state
= _wq_flags(wq
);
1395 } while (!OSCompareAndSwap(state
, state
| v
, &wq
->wq_flags
));
1398 return atomic_fetch_or_explicit(&wq
->wq_flags
, v
, memory_order_relaxed
);
1402 static inline uint32_t
1403 _wq_flags_and_orig(struct workqueue
*wq
, uint32_t v
)
1405 #if PTHREAD_INLINE_RMW_ATOMICS
1408 state
= _wq_flags(wq
);
1409 } while (!OSCompareAndSwap(state
, state
& v
, &wq
->wq_flags
));
1412 return atomic_fetch_and_explicit(&wq
->wq_flags
, v
, memory_order_relaxed
);
1417 WQ_TIMER_DELAYED_NEEDED(struct workqueue
*wq
)
1419 uint32_t oldflags
, newflags
;
1421 oldflags
= _wq_flags(wq
);
1422 if (oldflags
& (WQ_EXITING
| WQ_ATIMER_DELAYED_RUNNING
)) {
1425 newflags
= oldflags
| WQ_ATIMER_DELAYED_RUNNING
;
1426 } while (!OSCompareAndSwap(oldflags
, newflags
, &wq
->wq_flags
));
1431 WQ_TIMER_IMMEDIATE_NEEDED(struct workqueue
*wq
)
1433 uint32_t oldflags
, newflags
;
1435 oldflags
= _wq_flags(wq
);
1436 if (oldflags
& (WQ_EXITING
| WQ_ATIMER_IMMEDIATE_RUNNING
)) {
1439 newflags
= oldflags
| WQ_ATIMER_IMMEDIATE_RUNNING
;
1440 } while (!OSCompareAndSwap(oldflags
, newflags
, &wq
->wq_flags
));
1444 #pragma mark thread requests pacing
1446 static inline uint32_t
1447 _wq_pacing_shift_for_pri(int pri
)
1449 return _wq_bucket_to_thread_qos(pri
) - 1;
1453 _wq_highest_paced_priority(struct workqueue
*wq
)
1455 uint8_t paced
= wq
->wq_paced
;
1456 int msb
= paced
? 32 - __builtin_clz(paced
) : 0; // fls(paced) == bit + 1
1457 return WORKQUEUE_EVENT_MANAGER_BUCKET
- msb
;
1460 static inline uint8_t
1461 _wq_pacing_bit_for_pri(int pri
)
1463 return 1u << _wq_pacing_shift_for_pri(pri
);
1467 _wq_should_pace_priority(struct workqueue
*wq
, int pri
)
1469 return wq
->wq_paced
>= _wq_pacing_bit_for_pri(pri
);
1473 _wq_pacing_start(struct workqueue
*wq
, struct threadlist
*tl
)
1475 uint8_t bit
= _wq_pacing_bit_for_pri(tl
->th_priority
);
1476 assert((tl
->th_flags
& TH_LIST_PACING
) == 0);
1477 assert((wq
->wq_paced
& bit
) == 0);
1478 wq
->wq_paced
|= bit
;
1479 tl
->th_flags
|= TH_LIST_PACING
;
1483 _wq_pacing_end(struct workqueue
*wq
, struct threadlist
*tl
)
1485 if (tl
->th_flags
& TH_LIST_PACING
) {
1486 uint8_t bit
= _wq_pacing_bit_for_pri(tl
->th_priority
);
1487 assert((wq
->wq_paced
& bit
) != 0);
1488 wq
->wq_paced
^= bit
;
1489 tl
->th_flags
&= ~TH_LIST_PACING
;
1490 return wq
->wq_paced
< bit
; // !_wq_should_pace_priority
1495 #pragma mark thread requests
1498 _threadreq_init_alloced(struct threadreq
*req
, int priority
, int flags
)
1500 assert((flags
& TR_FLAG_ONSTACK
) == 0);
1501 req
->tr_state
= TR_STATE_NEW
;
1502 req
->tr_priority
= priority
;
1503 req
->tr_flags
= flags
;
1507 _threadreq_init_stack(struct threadreq
*req
, int priority
, int flags
)
1509 req
->tr_state
= TR_STATE_NEW
;
1510 req
->tr_priority
= priority
;
1511 req
->tr_flags
= flags
| TR_FLAG_ONSTACK
;
1515 _threadreq_copy_prepare(struct workqueue
*wq
)
1518 if (wq
->wq_cached_threadreq
) {
1522 workqueue_unlock(wq
);
1523 struct threadreq
*req
= zalloc(pthread_zone_threadreq
);
1524 workqueue_lock_spin(wq
);
1526 if (wq
->wq_cached_threadreq
) {
1528 * We lost the race and someone left behind an extra threadreq for us
1529 * to use. Throw away our request and retry.
1531 workqueue_unlock(wq
);
1532 zfree(pthread_zone_threadreq
, req
);
1533 workqueue_lock_spin(wq
);
1536 wq
->wq_cached_threadreq
= req
;
1539 assert(wq
->wq_cached_threadreq
);
1543 _threadreq_copy_prepare_noblock(struct workqueue
*wq
)
1545 if (wq
->wq_cached_threadreq
) {
1549 wq
->wq_cached_threadreq
= zalloc_noblock(pthread_zone_threadreq
);
1551 return wq
->wq_cached_threadreq
!= NULL
;
1554 static inline struct threadreq_head
*
1555 _threadreq_list_for_req(struct workqueue
*wq
, const struct threadreq
*req
)
1557 if (req
->tr_flags
& TR_FLAG_OVERCOMMIT
) {
1558 return &wq
->wq_overcommit_reqlist
[req
->tr_priority
];
1560 return &wq
->wq_reqlist
[req
->tr_priority
];
1565 _threadreq_enqueue(struct workqueue
*wq
, struct threadreq
*req
)
1567 assert(req
&& req
->tr_state
== TR_STATE_NEW
);
1568 if (req
->tr_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
) {
1569 assert(wq
->wq_event_manager_threadreq
.tr_state
!= TR_STATE_WAITING
);
1570 memcpy(&wq
->wq_event_manager_threadreq
, req
, sizeof(struct threadreq
));
1571 req
= &wq
->wq_event_manager_threadreq
;
1572 req
->tr_flags
&= ~(TR_FLAG_ONSTACK
| TR_FLAG_NO_PACING
);
1574 if (req
->tr_flags
& TR_FLAG_ONSTACK
) {
1575 assert(wq
->wq_cached_threadreq
);
1576 struct threadreq
*newreq
= wq
->wq_cached_threadreq
;
1577 wq
->wq_cached_threadreq
= NULL
;
1579 memcpy(newreq
, req
, sizeof(struct threadreq
));
1580 newreq
->tr_flags
&= ~(TR_FLAG_ONSTACK
| TR_FLAG_NO_PACING
);
1581 req
->tr_state
= TR_STATE_DEAD
;
1584 TAILQ_INSERT_TAIL(_threadreq_list_for_req(wq
, req
), req
, tr_entry
);
1586 req
->tr_state
= TR_STATE_WAITING
;
1591 _threadreq_dequeue(struct workqueue
*wq
, struct threadreq
*req
)
1593 if (req
->tr_priority
!= WORKQUEUE_EVENT_MANAGER_BUCKET
) {
1594 struct threadreq_head
*req_list
= _threadreq_list_for_req(wq
, req
);
1596 struct threadreq
*cursor
= NULL
;
1597 TAILQ_FOREACH(cursor
, req_list
, tr_entry
) {
1598 if (cursor
== req
) break;
1600 assert(cursor
== req
);
1602 TAILQ_REMOVE(req_list
, req
, tr_entry
);
1608 * Mark a thread request as complete. At this point, it is treated as owned by
1609 * the submitting subsystem and you should assume it could be freed.
1611 * Called with the workqueue lock held.
1614 _threadreq_complete_and_unlock(proc_t p
, struct workqueue
*wq
,
1615 struct threadreq
*req
, struct threadlist
*tl
)
1617 struct threadreq
*req_tofree
= NULL
;
1618 bool sync
= (req
->tr_state
== TR_STATE_NEW
);
1619 bool workloop
= req
->tr_flags
& TR_FLAG_WORKLOOP
;
1620 bool onstack
= req
->tr_flags
& TR_FLAG_ONSTACK
;
1621 bool kevent
= req
->tr_flags
& TR_FLAG_KEVENT
;
1622 bool unbinding
= tl
->th_flags
& TH_LIST_UNBINDING
;
1624 bool waking_parked_thread
= (tl
->th_flags
& TH_LIST_BUSY
);
1627 req
->tr_state
= TR_STATE_COMPLETE
;
1629 if (!workloop
&& !onstack
&& req
!= &wq
->wq_event_manager_threadreq
) {
1630 if (wq
->wq_cached_threadreq
) {
1633 wq
->wq_cached_threadreq
= req
;
1637 if (tl
->th_flags
& TH_LIST_UNBINDING
) {
1638 tl
->th_flags
&= ~TH_LIST_UNBINDING
;
1639 assert((tl
->th_flags
& TH_LIST_KEVENT_BOUND
));
1640 } else if (workloop
|| kevent
) {
1641 assert((tl
->th_flags
& TH_LIST_KEVENT_BOUND
) == 0);
1642 tl
->th_flags
|= TH_LIST_KEVENT_BOUND
;
1646 workqueue_unlock(wq
);
1647 ret
= pthread_kern
->workloop_fulfill_threadreq(wq
->wq_proc
, (void*)req
,
1648 tl
->th_thread
, sync
? WORKLOOP_FULFILL_THREADREQ_SYNC
: 0);
1651 } else if (kevent
) {
1652 unsigned int kevent_flags
= KEVENT_FLAG_WORKQ
;
1654 kevent_flags
|= KEVENT_FLAG_SYNCHRONOUS_BIND
;
1656 if (tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
) {
1657 kevent_flags
|= KEVENT_FLAG_WORKQ_MANAGER
;
1659 workqueue_unlock(wq
);
1660 ret
= kevent_qos_internal_bind(wq
->wq_proc
,
1661 class_index_get_thread_qos(tl
->th_priority
), tl
->th_thread
,
1664 workqueue_lock_spin(wq
);
1665 tl
->th_flags
&= ~TH_LIST_KEVENT_BOUND
;
1675 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq
| DBG_FUNC_END
, wq
, 0, 0, 0, 0);
1676 PTHREAD_TRACE_WQ_REQ(TRACE_wq_runitem
| DBG_FUNC_START
, wq
, req
, tl
->th_priority
,
1677 thread_tid(current_thread()), thread_tid(tl
->th_thread
));
1679 if (waking_parked_thread
) {
1681 workqueue_lock_spin(wq
);
1683 tl
->th_flags
&= ~(TH_LIST_BUSY
);
1684 if ((tl
->th_flags
& TH_LIST_REMOVING_VOUCHER
) == 0) {
1686 * If the thread is in the process of removing its voucher, then it
1687 * isn't actually in the wait event yet and we don't need to wake
1688 * it up. Save the trouble (and potential lock-ordering issues
1691 thread_wakeup_thread(tl
, tl
->th_thread
);
1693 workqueue_unlock(wq
);
1695 if (req_tofree
) zfree(pthread_zone_threadreq
, req_tofree
);
1696 return WQ_RUN_TR_THREAD_STARTED
;
1699 assert ((tl
->th_flags
& TH_LIST_PACING
) == 0);
1701 workqueue_unlock(wq
);
1703 if (req_tofree
) zfree(pthread_zone_threadreq
, req_tofree
);
1705 return WQ_RUN_TR_THREAD_STARTED
;
1707 _setup_wqthread(p
, tl
->th_thread
, wq
, tl
, WQ_SETUP_CLEAR_VOUCHER
);
1708 pthread_kern
->unix_syscall_return(EJUSTRETURN
);
1709 __builtin_unreachable();
1713 * Mark a thread request as cancelled. Has similar ownership semantics to the
1714 * complete call above.
1717 _threadreq_cancel(struct workqueue
*wq
, struct threadreq
*req
)
1719 assert(req
->tr_state
== TR_STATE_WAITING
);
1720 req
->tr_state
= TR_STATE_DEAD
;
1722 assert((req
->tr_flags
& TR_FLAG_ONSTACK
) == 0);
1723 if (req
->tr_flags
& TR_FLAG_WORKLOOP
) {
1724 __assert_only
int ret
;
1725 ret
= pthread_kern
->workloop_fulfill_threadreq(wq
->wq_proc
, (void*)req
,
1726 THREAD_NULL
, WORKLOOP_FULFILL_THREADREQ_CANCEL
);
1727 assert(ret
== 0 || ret
== ECANCELED
);
1728 } else if (req
!= &wq
->wq_event_manager_threadreq
) {
1729 zfree(pthread_zone_threadreq
, req
);
1733 #pragma mark workqueue lock
1735 static boolean_t
workqueue_lock_spin_is_acquired_kdp(struct workqueue
*wq
) {
1736 return kdp_lck_spin_is_acquired(&wq
->wq_lock
);
1740 workqueue_lock_spin(struct workqueue
*wq
)
1742 assert(ml_get_interrupts_enabled() == TRUE
);
1743 lck_spin_lock(&wq
->wq_lock
);
1747 workqueue_lock_try(struct workqueue
*wq
)
1749 return lck_spin_try_lock(&wq
->wq_lock
);
1753 workqueue_unlock(struct workqueue
*wq
)
1755 lck_spin_unlock(&wq
->wq_lock
);
1758 #pragma mark workqueue add timer
1761 * Sets up the timer which will call out to workqueue_add_timer
1764 workqueue_interval_timer_start(struct workqueue
*wq
)
1768 /* n.b. wq_timer_interval is reset to 0 in workqueue_add_timer if the
1769 ATIMER_RUNNING flag is not present. The net effect here is that if a
1770 sequence of threads is required, we'll double the time before we give out
1772 if (wq
->wq_timer_interval
== 0) {
1773 wq
->wq_timer_interval
= wq_stalled_window_usecs
;
1776 wq
->wq_timer_interval
= wq
->wq_timer_interval
* 2;
1778 if (wq
->wq_timer_interval
> wq_max_timer_interval_usecs
) {
1779 wq
->wq_timer_interval
= wq_max_timer_interval_usecs
;
1782 clock_interval_to_deadline(wq
->wq_timer_interval
, 1000, &deadline
);
1784 PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer
, wq
, wq
->wq_reqcount
,
1785 _wq_flags(wq
), wq
->wq_timer_interval
, 0);
1787 thread_call_t call
= wq
->wq_atimer_delayed_call
;
1788 if (thread_call_enter1_delayed(call
, call
, deadline
)) {
1789 panic("delayed_call was already enqueued");
1794 * Immediately trigger the workqueue_add_timer
1797 workqueue_interval_timer_trigger(struct workqueue
*wq
)
1799 PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer
, wq
, wq
->wq_reqcount
,
1800 _wq_flags(wq
), 0, 0);
1802 thread_call_t call
= wq
->wq_atimer_immediate_call
;
1803 if (thread_call_enter1(call
, call
)) {
1804 panic("immediate_call was already enqueued");
1809 * returns whether lastblocked_tsp is within wq_stalled_window_usecs of cur_ts
1812 wq_thread_is_busy(uint64_t cur_ts
, _Atomic
uint64_t *lastblocked_tsp
)
1816 uint64_t lastblocked_ts
;
1819 lastblocked_ts
= atomic_load_explicit(lastblocked_tsp
, memory_order_relaxed
);
1820 if (lastblocked_ts
>= cur_ts
) {
1822 * because the update of the timestamp when a thread blocks isn't
1823 * serialized against us looking at it (i.e. we don't hold the workq lock)
1824 * it's possible to have a timestamp that matches the current time or
1825 * that even looks to be in the future relative to when we grabbed the current
1826 * time... just treat this as a busy thread since it must have just blocked.
1830 elapsed
= cur_ts
- lastblocked_ts
;
1832 pthread_kern
->absolutetime_to_microtime(elapsed
, &secs
, &usecs
);
1834 return (secs
== 0 && usecs
< wq_stalled_window_usecs
);
1838 * handler function for the timer
1841 workqueue_add_timer(struct workqueue
*wq
, thread_call_t thread_call_self
)
1843 proc_t p
= wq
->wq_proc
;
1845 workqueue_lock_spin(wq
);
1847 PTHREAD_TRACE_WQ(TRACE_wq_add_timer
| DBG_FUNC_START
, wq
,
1848 _wq_flags(wq
), wq
->wq_nthreads
, wq
->wq_thidlecount
, 0);
1851 * There's two tricky issues here.
1853 * First issue: we start the thread_call's that invoke this routine without
1854 * the workqueue lock held. The scheduler callback needs to trigger
1855 * reevaluation of the number of running threads but shouldn't take that
1856 * lock, so we can't use it to synchronize state around the thread_call.
1857 * As a result, it might re-enter the thread_call while this routine is
1858 * already running. This could cause it to fire a second time and we'll
1859 * have two add_timers running at once. Obviously, we don't want that to
1860 * keep stacking, so we need to keep it at two timers.
1862 * Solution: use wq_flags (accessed via atomic CAS) to synchronize the
1863 * enqueue of the thread_call itself. When a thread needs to trigger the
1864 * add_timer, it checks for ATIMER_DELAYED_RUNNING and, when not set, sets
1865 * the flag then does a thread_call_enter. We'll then remove that flag
1866 * only once we've got the lock and it's safe for the thread_call to be
1869 * Second issue: we need to make sure that the two timers don't execute this
1870 * routine concurrently. We can't use the workqueue lock for this because
1871 * we'll need to drop it during our execution.
1873 * Solution: use WQL_ATIMER_BUSY as a condition variable to indicate that
1874 * we are currently executing the routine and the next thread should wait.
1876 * After all that, we arrive at the following four possible states:
1877 * !WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY no pending timer, no active timer
1878 * !WQ_ATIMER_DELAYED_RUNNING && WQL_ATIMER_BUSY no pending timer, 1 active timer
1879 * WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY 1 pending timer, no active timer
1880 * WQ_ATIMER_DELAYED_RUNNING && WQL_ATIMER_BUSY 1 pending timer, 1 active timer
1882 * Further complication sometimes we need to trigger this function to run
1883 * without delay. Because we aren't under a lock between setting
1884 * WQ_ATIMER_DELAYED_RUNNING and calling thread_call_enter, we can't simply
1885 * re-enter the thread call: if thread_call_enter() returned false, we
1886 * wouldn't be able to distinguish the case where the thread_call had
1887 * already fired from the case where it hadn't been entered yet from the
1888 * other thread. So, we use a separate thread_call for immediate
1889 * invocations, and a separate RUNNING flag, WQ_ATIMER_IMMEDIATE_RUNNING.
1892 while (wq
->wq_lflags
& WQL_ATIMER_BUSY
) {
1893 wq
->wq_lflags
|= WQL_ATIMER_WAITING
;
1895 assert_wait((caddr_t
)wq
, (THREAD_UNINT
));
1896 workqueue_unlock(wq
);
1898 thread_block(THREAD_CONTINUE_NULL
);
1900 workqueue_lock_spin(wq
);
1903 * Prevent _workqueue_mark_exiting() from going away
1905 wq
->wq_lflags
|= WQL_ATIMER_BUSY
;
1908 * Decide which timer we are and remove the RUNNING flag.
1910 if (thread_call_self
== wq
->wq_atimer_delayed_call
) {
1911 uint64_t wq_flags
= _wq_flags_and_orig(wq
, ~WQ_ATIMER_DELAYED_RUNNING
);
1912 if ((wq_flags
& WQ_ATIMER_DELAYED_RUNNING
) == 0) {
1913 panic("workqueue_add_timer(delayed) w/o WQ_ATIMER_DELAYED_RUNNING");
1915 } else if (thread_call_self
== wq
->wq_atimer_immediate_call
) {
1916 uint64_t wq_flags
= _wq_flags_and_orig(wq
, ~WQ_ATIMER_IMMEDIATE_RUNNING
);
1917 if ((wq_flags
& WQ_ATIMER_IMMEDIATE_RUNNING
) == 0) {
1918 panic("workqueue_add_timer(immediate) w/o WQ_ATIMER_IMMEDIATE_RUNNING");
1921 panic("workqueue_add_timer can't figure out which timer it is");
1924 int ret
= WQ_RUN_TR_THREAD_STARTED
;
1925 while (ret
== WQ_RUN_TR_THREAD_STARTED
&& wq
->wq_reqcount
) {
1926 ret
= workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, NULL
, true);
1928 workqueue_lock_spin(wq
);
1930 _threadreq_copy_prepare(wq
);
1933 * If we called WQ_TIMER_NEEDED above, then this flag will be set if that
1934 * call marked the timer running. If so, we let the timer interval grow.
1935 * Otherwise, we reset it back to 0.
1937 uint32_t wq_flags
= _wq_flags(wq
);
1938 if (!(wq_flags
& WQ_ATIMER_DELAYED_RUNNING
)) {
1939 wq
->wq_timer_interval
= 0;
1942 wq
->wq_lflags
&= ~WQL_ATIMER_BUSY
;
1944 if ((wq_flags
& WQ_EXITING
) || (wq
->wq_lflags
& WQL_ATIMER_WAITING
)) {
1946 * wakeup the thread hung up in _workqueue_mark_exiting or
1947 * workqueue_add_timer waiting for this timer to finish getting out of
1950 wq
->wq_lflags
&= ~WQL_ATIMER_WAITING
;
1954 PTHREAD_TRACE_WQ(TRACE_wq_add_timer
| DBG_FUNC_END
, wq
, 0, wq
->wq_nthreads
, wq
->wq_thidlecount
, 0);
1956 workqueue_unlock(wq
);
1959 #pragma mark thread state tracking
1961 // called by spinlock code when trying to yield to lock owner
1963 _workqueue_thread_yielded(void)
1968 workqueue_callback(int type
, thread_t thread
)
1970 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(thread
);
1971 struct threadlist
*tl
= pthread_kern
->uthread_get_threadlist(uth
);
1972 struct workqueue
*wq
= tl
->th_workq
;
1973 uint32_t old_count
, req_qos
, qos
= tl
->th_priority
;
1974 wq_thactive_t old_thactive
;
1977 case SCHED_CALL_BLOCK
: {
1978 bool start_timer
= false;
1980 old_thactive
= _wq_thactive_dec(wq
, tl
->th_priority
);
1981 req_qos
= WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive
);
1982 old_count
= _wq_thactive_aggregate_downto_qos(wq
, old_thactive
,
1985 if (old_count
== wq_max_concurrency
[tl
->th_priority
]) {
1987 * The number of active threads at this priority has fallen below
1988 * the maximum number of concurrent threads that are allowed to run
1990 * if we collide with another thread trying to update the
1991 * last_blocked (really unlikely since another thread would have to
1992 * get scheduled and then block after we start down this path), it's
1993 * not a problem. Either timestamp is adequate, so no need to retry
1995 atomic_store_explicit(&wq
->wq_lastblocked_ts
[qos
],
1996 mach_absolute_time(), memory_order_relaxed
);
1999 if (req_qos
== WORKQUEUE_EVENT_MANAGER_BUCKET
|| qos
> req_qos
) {
2001 * The blocking thread is at a lower QoS than the highest currently
2002 * pending constrained request, nothing has to be redriven
2005 uint32_t max_busycount
, old_req_count
;
2006 old_req_count
= _wq_thactive_aggregate_downto_qos(wq
, old_thactive
,
2007 req_qos
, NULL
, &max_busycount
);
2009 * If it is possible that may_start_constrained_thread had refused
2010 * admission due to being over the max concurrency, we may need to
2011 * spin up a new thread.
2013 * We take into account the maximum number of busy threads
2014 * that can affect may_start_constrained_thread as looking at the
2015 * actual number may_start_constrained_thread will see is racy.
2017 * IOW at NCPU = 4, for IN (req_qos = 1), if the old req count is
2018 * between NCPU (4) and NCPU - 2 (2) we need to redrive.
2020 if (wq_max_concurrency
[req_qos
] <= old_req_count
+ max_busycount
&&
2021 old_req_count
<= wq_max_concurrency
[req_qos
]) {
2022 if (WQ_TIMER_DELAYED_NEEDED(wq
)) {
2024 workqueue_interval_timer_start(wq
);
2029 PTHREAD_TRACE_WQ(TRACE_wq_thread_block
| DBG_FUNC_START
, wq
,
2030 old_count
- 1, qos
| (req_qos
<< 8),
2031 wq
->wq_reqcount
<< 1 | start_timer
, 0);
2034 case SCHED_CALL_UNBLOCK
: {
2036 * we cannot take the workqueue_lock here...
2037 * an UNBLOCK can occur from a timer event which
2038 * is run from an interrupt context... if the workqueue_lock
2039 * is already held by this processor, we'll deadlock...
2040 * the thread lock for the thread being UNBLOCKED
2043 old_thactive
= _wq_thactive_inc(wq
, qos
);
2044 if (pthread_debug_tracing
) {
2045 req_qos
= WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive
);
2046 old_count
= _wq_thactive_aggregate_downto_qos(wq
, old_thactive
,
2048 PTHREAD_TRACE_WQ(TRACE_wq_thread_block
| DBG_FUNC_END
, wq
,
2049 old_count
+ 1, qos
| (req_qos
<< 8),
2050 wq
->wq_threads_scheduled
, 0);
2058 _workqueue_get_sched_callback(void)
2060 return workqueue_callback
;
2063 #pragma mark thread addition/removal
2065 static mach_vm_size_t
2066 _workqueue_allocsize(struct workqueue
*wq
)
2068 proc_t p
= wq
->wq_proc
;
2069 mach_vm_size_t guardsize
= vm_map_page_size(wq
->wq_map
);
2070 mach_vm_size_t pthread_size
=
2071 vm_map_round_page_mask(pthread_kern
->proc_get_pthsize(p
) + PTHREAD_T_OFFSET
, vm_map_page_mask(wq
->wq_map
));
2072 return guardsize
+ PTH_DEFAULT_STACKSIZE
+ pthread_size
;
2076 * pop goes the thread
2078 * If fromexit is set, the call is from workqueue_exit(,
2079 * so some cleanups are to be avoided.
2082 workqueue_removethread(struct threadlist
*tl
, bool fromexit
, bool first_use
)
2084 struct uthread
* uth
;
2085 struct workqueue
* wq
= tl
->th_workq
;
2087 if (tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
){
2088 TAILQ_REMOVE(&wq
->wq_thidlemgrlist
, tl
, th_entry
);
2090 TAILQ_REMOVE(&wq
->wq_thidlelist
, tl
, th_entry
);
2093 if (fromexit
== 0) {
2094 assert(wq
->wq_nthreads
&& wq
->wq_thidlecount
);
2096 wq
->wq_thidlecount
--;
2100 * Clear the threadlist pointer in uthread so
2101 * blocked thread on wakeup for termination will
2102 * not access the thread list as it is going to be
2105 pthread_kern
->thread_sched_call(tl
->th_thread
, NULL
);
2107 uth
= pthread_kern
->get_bsdthread_info(tl
->th_thread
);
2108 if (uth
!= (struct uthread
*)0) {
2109 pthread_kern
->uthread_set_threadlist(uth
, NULL
);
2111 if (fromexit
== 0) {
2112 /* during exit the lock is not held */
2113 workqueue_unlock(wq
);
2116 if ( (tl
->th_flags
& TH_LIST_NEW
) || first_use
) {
2118 * thread was created, but never used...
2119 * need to clean up the stack and port ourselves
2120 * since we're not going to spin up through the
2121 * normal exit path triggered from Libc
2123 if (fromexit
== 0) {
2124 /* vm map is already deallocated when this is called from exit */
2125 (void)mach_vm_deallocate(wq
->wq_map
, tl
->th_stackaddr
, _workqueue_allocsize(wq
));
2127 (void)pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(wq
->wq_task
), tl
->th_thport
);
2130 * drop our ref on the thread
2132 thread_deallocate(tl
->th_thread
);
2134 zfree(pthread_zone_threadlist
, tl
);
2139 * Try to add a new workqueue thread.
2141 * - called with workq lock held
2142 * - dropped and retaken around thread creation
2143 * - return with workq lock held
2146 workqueue_addnewthread(proc_t p
, struct workqueue
*wq
)
2152 workqueue_unlock(wq
);
2154 struct threadlist
*tl
= zalloc(pthread_zone_threadlist
);
2155 bzero(tl
, sizeof(struct threadlist
));
2158 kret
= pthread_kern
->thread_create_workq_waiting(wq
->wq_task
, wq_unpark_continue
, tl
, &th
);
2159 if (kret
!= KERN_SUCCESS
) {
2160 PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed
| DBG_FUNC_NONE
, wq
, kret
, 0, 0, 0);
2164 mach_vm_offset_t stackaddr
= pthread_kern
->proc_get_stack_addr_hint(p
);
2166 mach_vm_size_t guardsize
= vm_map_page_size(wq
->wq_map
);
2167 mach_vm_size_t pthread_size
=
2168 vm_map_round_page_mask(pthread_kern
->proc_get_pthsize(p
) + PTHREAD_T_OFFSET
, vm_map_page_mask(wq
->wq_map
));
2169 mach_vm_size_t th_allocsize
= guardsize
+ PTH_DEFAULT_STACKSIZE
+ pthread_size
;
2171 kret
= mach_vm_map(wq
->wq_map
, &stackaddr
,
2172 th_allocsize
, page_size
-1,
2173 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
, NULL
,
2174 0, FALSE
, VM_PROT_DEFAULT
, VM_PROT_ALL
,
2175 VM_INHERIT_DEFAULT
);
2177 if (kret
!= KERN_SUCCESS
) {
2178 kret
= mach_vm_allocate(wq
->wq_map
,
2179 &stackaddr
, th_allocsize
,
2180 VM_MAKE_TAG(VM_MEMORY_STACK
) | VM_FLAGS_ANYWHERE
);
2183 if (kret
!= KERN_SUCCESS
) {
2184 PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed
| DBG_FUNC_NONE
, wq
, kret
, 1, 0, 0);
2185 goto fail_terminate
;
2189 * The guard page is at the lowest address
2190 * The stack base is the highest address
2192 kret
= mach_vm_protect(wq
->wq_map
, stackaddr
, guardsize
, FALSE
, VM_PROT_NONE
);
2193 if (kret
!= KERN_SUCCESS
) {
2194 PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed
| DBG_FUNC_NONE
, wq
, kret
, 2, 0, 0);
2195 goto fail_vm_deallocate
;
2199 pthread_kern
->thread_set_tag(th
, THREAD_TAG_PTHREAD
| THREAD_TAG_WORKQUEUE
);
2200 pthread_kern
->thread_static_param(th
, TRUE
);
2203 * convert_thread_to_port() consumes a reference
2205 thread_reference(th
);
2206 void *sright
= (void *)pthread_kern
->convert_thread_to_port(th
);
2207 tl
->th_thport
= pthread_kern
->ipc_port_copyout_send(sright
,
2208 pthread_kern
->task_get_ipcspace(wq
->wq_task
));
2210 tl
->th_flags
= TH_LIST_INITED
| TH_LIST_NEW
;
2213 tl
->th_stackaddr
= stackaddr
;
2214 tl
->th_priority
= WORKQUEUE_NUM_BUCKETS
;
2216 struct uthread
*uth
;
2217 uth
= pthread_kern
->get_bsdthread_info(tl
->th_thread
);
2219 workqueue_lock_spin(wq
);
2221 void *current_tl
= pthread_kern
->uthread_get_threadlist(uth
);
2222 if (current_tl
== NULL
) {
2223 pthread_kern
->uthread_set_threadlist(uth
, tl
);
2224 TAILQ_INSERT_TAIL(&wq
->wq_thidlelist
, tl
, th_entry
);
2225 wq
->wq_thidlecount
++;
2226 } else if (current_tl
== WQ_THREADLIST_EXITING_POISON
) {
2228 * Failed thread creation race: The thread already woke up and has exited.
2230 PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed
| DBG_FUNC_NONE
, wq
, kret
, 3, 0, 0);
2233 panic("Unexpected initial threadlist value");
2236 PTHREAD_TRACE_WQ(TRACE_wq_thread_create
| DBG_FUNC_NONE
, wq
, 0, 0, 0, 0);
2241 workqueue_unlock(wq
);
2242 (void)pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(wq
->wq_task
),
2246 (void) mach_vm_deallocate(wq
->wq_map
, stackaddr
, th_allocsize
);
2249 if (pthread_kern
->thread_will_park_or_terminate
) {
2250 pthread_kern
->thread_will_park_or_terminate(th
);
2252 (void)thread_terminate(th
);
2253 thread_deallocate(th
);
2256 zfree(pthread_zone_threadlist
, tl
);
2258 workqueue_lock_spin(wq
);
2265 * Setup per-process state for the workqueue.
2268 _workq_open(struct proc
*p
, __unused
int32_t *retval
)
2270 struct workqueue
* wq
;
2275 if (pthread_kern
->proc_get_register(p
) == 0) {
2279 num_cpus
= pthread_kern
->ml_get_max_cpus();
2281 if (wq_init_constrained_limit
) {
2284 * set up the limit for the constrained pool
2285 * this is a virtual pool in that we don't
2286 * maintain it on a separate idle and run list
2288 limit
= num_cpus
* WORKQUEUE_CONSTRAINED_FACTOR
;
2290 if (limit
> wq_max_constrained_threads
)
2291 wq_max_constrained_threads
= limit
;
2293 wq_init_constrained_limit
= 0;
2295 if (wq_max_threads
> WQ_THACTIVE_BUCKET_HALF
) {
2296 wq_max_threads
= WQ_THACTIVE_BUCKET_HALF
;
2298 if (wq_max_threads
> pthread_kern
->config_thread_max
- 20) {
2299 wq_max_threads
= pthread_kern
->config_thread_max
- 20;
2303 if (pthread_kern
->proc_get_wqptr(p
) == NULL
) {
2304 if (pthread_kern
->proc_init_wqptr_or_wait(p
) == FALSE
) {
2305 assert(pthread_kern
->proc_get_wqptr(p
) != NULL
);
2309 ptr
= (char *)zalloc(pthread_zone_workqueue
);
2310 bzero(ptr
, sizeof(struct workqueue
));
2312 wq
= (struct workqueue
*)ptr
;
2314 wq
->wq_task
= current_task();
2315 wq
->wq_map
= pthread_kern
->current_map();
2317 // Start the event manager at the priority hinted at by the policy engine
2318 int mgr_priority_hint
= pthread_kern
->task_get_default_manager_qos(current_task());
2319 wq
->wq_event_manager_priority
= (uint32_t)thread_qos_get_pthread_priority(mgr_priority_hint
) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
;
2321 TAILQ_INIT(&wq
->wq_thrunlist
);
2322 TAILQ_INIT(&wq
->wq_thidlelist
);
2323 for (int i
= 0; i
< WORKQUEUE_EVENT_MANAGER_BUCKET
; i
++) {
2324 TAILQ_INIT(&wq
->wq_overcommit_reqlist
[i
]);
2325 TAILQ_INIT(&wq
->wq_reqlist
[i
]);
2328 wq
->wq_atimer_delayed_call
=
2329 thread_call_allocate_with_priority((thread_call_func_t
)workqueue_add_timer
,
2330 (thread_call_param_t
)wq
, THREAD_CALL_PRIORITY_KERNEL
);
2331 wq
->wq_atimer_immediate_call
=
2332 thread_call_allocate_with_priority((thread_call_func_t
)workqueue_add_timer
,
2333 (thread_call_param_t
)wq
, THREAD_CALL_PRIORITY_KERNEL
);
2335 lck_spin_init(&wq
->wq_lock
, pthread_lck_grp
, pthread_lck_attr
);
2337 wq
->wq_cached_threadreq
= zalloc(pthread_zone_threadreq
);
2338 *(wq_thactive_t
*)&wq
->wq_thactive
=
2339 (wq_thactive_t
)WQ_THACTIVE_NO_PENDING_REQUEST
<<
2340 WQ_THACTIVE_QOS_SHIFT
;
2342 pthread_kern
->proc_set_wqptr(p
, wq
);
2351 * Routine: workqueue_mark_exiting
2353 * Function: Mark the work queue such that new threads will not be added to the
2354 * work queue after we return.
2356 * Conditions: Called against the current process.
2359 _workqueue_mark_exiting(struct proc
*p
)
2361 struct workqueue
*wq
= pthread_kern
->proc_get_wqptr(p
);
2364 PTHREAD_TRACE_WQ(TRACE_wq_pthread_exit
|DBG_FUNC_START
, wq
, 0, 0, 0, 0);
2366 workqueue_lock_spin(wq
);
2369 * We arm the add timer without holding the workqueue lock so we need
2370 * to synchronize with any running or soon to be running timers.
2372 * Threads that intend to arm the timer atomically OR
2373 * WQ_ATIMER_{DELAYED,IMMEDIATE}_RUNNING into the wq_flags, only if
2374 * WQ_EXITING is not present. So, once we have set WQ_EXITING, we can
2375 * be sure that no new RUNNING flags will be set, but still need to
2376 * wait for the already running timers to complete.
2378 * We always hold the workq lock when dropping WQ_ATIMER_RUNNING, so
2379 * the check for and sleep until clear is protected.
2381 uint64_t wq_flags
= _wq_flags_or_orig(wq
, WQ_EXITING
);
2383 if (wq_flags
& WQ_ATIMER_DELAYED_RUNNING
) {
2384 if (thread_call_cancel(wq
->wq_atimer_delayed_call
) == TRUE
) {
2385 wq_flags
= _wq_flags_and_orig(wq
, ~WQ_ATIMER_DELAYED_RUNNING
);
2388 if (wq_flags
& WQ_ATIMER_IMMEDIATE_RUNNING
) {
2389 if (thread_call_cancel(wq
->wq_atimer_immediate_call
) == TRUE
) {
2390 wq_flags
= _wq_flags_and_orig(wq
, ~WQ_ATIMER_IMMEDIATE_RUNNING
);
2393 while ((_wq_flags(wq
) & (WQ_ATIMER_DELAYED_RUNNING
| WQ_ATIMER_IMMEDIATE_RUNNING
)) ||
2394 (wq
->wq_lflags
& WQL_ATIMER_BUSY
)) {
2395 assert_wait((caddr_t
)wq
, (THREAD_UNINT
));
2396 workqueue_unlock(wq
);
2398 thread_block(THREAD_CONTINUE_NULL
);
2400 workqueue_lock_spin(wq
);
2404 * Save off pending requests, will complete/free them below after unlocking
2406 TAILQ_HEAD(, threadreq
) local_list
= TAILQ_HEAD_INITIALIZER(local_list
);
2408 for (int i
= 0; i
< WORKQUEUE_EVENT_MANAGER_BUCKET
; i
++) {
2409 TAILQ_CONCAT(&local_list
, &wq
->wq_overcommit_reqlist
[i
], tr_entry
);
2410 TAILQ_CONCAT(&local_list
, &wq
->wq_reqlist
[i
], tr_entry
);
2414 * XXX: Can't deferred cancel the event manager request, so just smash it.
2416 assert((wq
->wq_event_manager_threadreq
.tr_flags
& TR_FLAG_WORKLOOP
) == 0);
2417 wq
->wq_event_manager_threadreq
.tr_state
= TR_STATE_DEAD
;
2419 workqueue_unlock(wq
);
2421 struct threadreq
*tr
, *tr_temp
;
2422 TAILQ_FOREACH_SAFE(tr
, &local_list
, tr_entry
, tr_temp
) {
2423 _threadreq_cancel(wq
, tr
);
2425 PTHREAD_TRACE(TRACE_wq_pthread_exit
|DBG_FUNC_END
, 0, 0, 0, 0, 0);
2429 * Routine: workqueue_exit
2431 * Function: clean up the work queue structure(s) now that there are no threads
2432 * left running inside the work queue (except possibly current_thread).
2434 * Conditions: Called by the last thread in the process.
2435 * Called against current process.
2438 _workqueue_exit(struct proc
*p
)
2440 struct workqueue
* wq
;
2441 struct threadlist
* tl
, *tlist
;
2442 struct uthread
*uth
;
2444 wq
= pthread_kern
->proc_get_wqptr(p
);
2447 PTHREAD_TRACE_WQ(TRACE_wq_workqueue_exit
|DBG_FUNC_START
, wq
, 0, 0, 0, 0);
2449 pthread_kern
->proc_set_wqptr(p
, NULL
);
2452 * Clean up workqueue data structures for threads that exited and
2453 * didn't get a chance to clean up after themselves.
2455 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thrunlist
, th_entry
, tlist
) {
2456 assert((tl
->th_flags
& TH_LIST_RUNNING
) != 0);
2458 pthread_kern
->thread_sched_call(tl
->th_thread
, NULL
);
2460 uth
= pthread_kern
->get_bsdthread_info(tl
->th_thread
);
2461 if (uth
!= (struct uthread
*)0) {
2462 pthread_kern
->uthread_set_threadlist(uth
, NULL
);
2464 TAILQ_REMOVE(&wq
->wq_thrunlist
, tl
, th_entry
);
2467 * drop our last ref on the thread
2469 thread_deallocate(tl
->th_thread
);
2471 zfree(pthread_zone_threadlist
, tl
);
2473 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thidlelist
, th_entry
, tlist
) {
2474 assert((tl
->th_flags
& TH_LIST_RUNNING
) == 0);
2475 assert(tl
->th_priority
!= WORKQUEUE_EVENT_MANAGER_BUCKET
);
2476 workqueue_removethread(tl
, true, false);
2478 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thidlemgrlist
, th_entry
, tlist
) {
2479 assert((tl
->th_flags
& TH_LIST_RUNNING
) == 0);
2480 assert(tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
);
2481 workqueue_removethread(tl
, true, false);
2483 if (wq
->wq_cached_threadreq
) {
2484 zfree(pthread_zone_threadreq
, wq
->wq_cached_threadreq
);
2486 thread_call_free(wq
->wq_atimer_delayed_call
);
2487 thread_call_free(wq
->wq_atimer_immediate_call
);
2488 lck_spin_destroy(&wq
->wq_lock
, pthread_lck_grp
);
2490 for (int i
= 0; i
< WORKQUEUE_EVENT_MANAGER_BUCKET
; i
++) {
2491 assert(TAILQ_EMPTY(&wq
->wq_overcommit_reqlist
[i
]));
2492 assert(TAILQ_EMPTY(&wq
->wq_reqlist
[i
]));
2495 zfree(pthread_zone_workqueue
, wq
);
2497 PTHREAD_TRACE(TRACE_wq_workqueue_exit
|DBG_FUNC_END
, 0, 0, 0, 0, 0);
2502 #pragma mark workqueue thread manipulation
2506 * Entry point for libdispatch to ask for threads
2509 wqops_queue_reqthreads(struct proc
*p
, int reqcount
,
2510 pthread_priority_t priority
)
2512 bool overcommit
= _pthread_priority_get_flags(priority
) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
;
2513 bool event_manager
= _pthread_priority_get_flags(priority
) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
;
2514 int class = event_manager
? WORKQUEUE_EVENT_MANAGER_BUCKET
:
2515 pthread_priority_get_class_index(priority
);
2517 if ((reqcount
<= 0) || (class < 0) || (class >= WORKQUEUE_NUM_BUCKETS
) ||
2518 (overcommit
&& event_manager
)) {
2522 struct workqueue
*wq
;
2523 if ((wq
= (struct workqueue
*)pthread_kern
->proc_get_wqptr(p
)) == NULL
) {
2527 workqueue_lock_spin(wq
);
2528 _threadreq_copy_prepare(wq
);
2530 PTHREAD_TRACE_WQ(TRACE_wq_wqops_reqthreads
| DBG_FUNC_NONE
, wq
, reqcount
, priority
, 0, 0);
2533 if (overcommit
) tr_flags
|= TR_FLAG_OVERCOMMIT
;
2536 * when libdispatch asks for more than one thread, it wants to achieve
2537 * parallelism. Pacing would be detrimental to this ask, so treat
2538 * these specially to not do the pacing admission check
2540 tr_flags
|= TR_FLAG_NO_PACING
;
2543 while (reqcount
-- && !_wq_exiting(wq
)) {
2544 struct threadreq req
;
2545 _threadreq_init_stack(&req
, class, tr_flags
);
2547 workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, &req
, true);
2549 workqueue_lock_spin(wq
); /* reacquire */
2550 _threadreq_copy_prepare(wq
);
2553 workqueue_unlock(wq
);
2559 * Used by the kevent system to request threads.
2561 * Currently count is ignored and we always return one thread per invocation.
2564 _workq_kevent_reqthreads(struct proc
*p
, pthread_priority_t priority
,
2567 int wq_run_tr
= WQ_RUN_TR_THROTTLED
;
2568 bool emergency_thread
= false;
2569 struct threadreq req
;
2572 struct workqueue
*wq
;
2573 if ((wq
= (struct workqueue
*)pthread_kern
->proc_get_wqptr(p
)) == NULL
) {
2577 int class = pthread_priority_get_class_index(priority
);
2579 workqueue_lock_spin(wq
);
2580 bool has_threadreq
= _threadreq_copy_prepare_noblock(wq
);
2582 PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads
| DBG_FUNC_NONE
, wq
, NULL
, priority
, 0, 0);
2585 * Skip straight to event manager if that's what was requested
2587 if ((_pthread_priority_get_qos_newest(priority
) == QOS_CLASS_UNSPECIFIED
) ||
2588 (_pthread_priority_get_flags(priority
) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
)){
2592 bool will_pace
= _wq_should_pace_priority(wq
, class);
2593 if ((wq
->wq_thidlecount
== 0 || will_pace
) && has_threadreq
== false) {
2595 * We'll need to persist the request and can't, so return the emergency
2596 * thread instead, which has a persistent request object.
2598 emergency_thread
= true;
2603 * Handle overcommit requests
2605 if ((_pthread_priority_get_flags(priority
) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
) != 0){
2606 _threadreq_init_stack(&req
, class, TR_FLAG_KEVENT
| TR_FLAG_OVERCOMMIT
);
2607 wq_run_tr
= workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, &req
, false);
2612 * Handle constrained requests
2614 boolean_t may_start
= may_start_constrained_thread(wq
, class, NULL
, false);
2615 if (may_start
|| no_emergency
) {
2616 _threadreq_init_stack(&req
, class, TR_FLAG_KEVENT
);
2617 wq_run_tr
= workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, &req
, false);
2620 emergency_thread
= true;
2625 _threadreq_init_stack(&req
, WORKQUEUE_EVENT_MANAGER_BUCKET
, TR_FLAG_KEVENT
);
2626 wq_run_tr
= workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, &req
, false);
2629 if (wq_run_tr
== WQ_RUN_TR_THREAD_NEEDED
&& WQ_TIMER_IMMEDIATE_NEEDED(wq
)) {
2630 workqueue_interval_timer_trigger(wq
);
2632 return emergency_thread
? (void*)-1 : 0;
2636 _workq_reqthreads(struct proc
*p
, __assert_only
int requests_count
,
2637 workq_reqthreads_req_t request
)
2639 assert(requests_count
== 1);
2641 pthread_priority_t priority
= request
->priority
;
2642 bool no_emergency
= request
->count
& WORKQ_REQTHREADS_NOEMERGENCY
;
2644 return _workq_kevent_reqthreads(p
, priority
, no_emergency
);
2649 workq_kern_threadreq(struct proc
*p
, workq_threadreq_t _req
,
2650 enum workq_threadreq_type type
, unsigned long priority
, int flags
)
2652 struct workqueue
*wq
;
2655 if ((wq
= (struct workqueue
*)pthread_kern
->proc_get_wqptr(p
)) == NULL
) {
2660 case WORKQ_THREADREQ_KEVENT
: {
2661 bool no_emergency
= flags
& WORKQ_THREADREQ_FLAG_NOEMERGENCY
;
2662 (void)_workq_kevent_reqthreads(p
, priority
, no_emergency
);
2665 case WORKQ_THREADREQ_WORKLOOP
:
2666 case WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
: {
2667 struct threadreq
*req
= (struct threadreq
*)_req
;
2668 int req_class
= pthread_priority_get_class_index(priority
);
2669 int req_flags
= TR_FLAG_WORKLOOP
;
2670 if ((_pthread_priority_get_flags(priority
) &
2671 _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
) != 0){
2672 req_flags
|= TR_FLAG_OVERCOMMIT
;
2675 thread_t thread
= current_thread();
2676 struct threadlist
*tl
= util_get_thread_threadlist_entry(thread
);
2678 if (tl
&& tl
!= WQ_THREADLIST_EXITING_POISON
&&
2679 (tl
->th_flags
& TH_LIST_UNBINDING
)) {
2681 * we're called back synchronously from the context of
2682 * kevent_qos_internal_unbind from within wqops_thread_return()
2683 * we can try to match up this thread with this request !
2689 _threadreq_init_alloced(req
, req_class
, req_flags
);
2690 workqueue_lock_spin(wq
);
2691 PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads
| DBG_FUNC_NONE
, wq
, req
, priority
, 1, 0);
2692 ret
= workqueue_run_threadreq_and_unlock(p
, wq
, tl
, req
, false);
2693 if (ret
== WQ_RUN_TR_EXITING
) {
2696 if (ret
== WQ_RUN_TR_THREAD_NEEDED
) {
2697 if (type
== WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
) {
2700 if (WQ_TIMER_IMMEDIATE_NEEDED(wq
)) {
2701 workqueue_interval_timer_trigger(wq
);
2706 case WORKQ_THREADREQ_REDRIVE
:
2707 PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads
| DBG_FUNC_NONE
, wq
, 0, 0, 4, 0);
2708 workqueue_lock_spin(wq
);
2709 ret
= workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, NULL
, true);
2710 if (ret
== WQ_RUN_TR_EXITING
) {
2720 workq_kern_threadreq_modify(struct proc
*p
, workq_threadreq_t _req
,
2721 enum workq_threadreq_op operation
, unsigned long arg1
,
2722 unsigned long __unused arg2
)
2724 struct threadreq
*req
= (struct threadreq
*)_req
;
2725 struct workqueue
*wq
;
2726 int priclass
, ret
= 0, wq_tr_rc
= WQ_RUN_TR_THROTTLED
;
2728 if (req
== NULL
|| (wq
= pthread_kern
->proc_get_wqptr(p
)) == NULL
) {
2732 workqueue_lock_spin(wq
);
2734 if (_wq_exiting(wq
)) {
2740 * Find/validate the referenced request structure
2742 if (req
->tr_state
!= TR_STATE_WAITING
) {
2746 assert(req
->tr_priority
< WORKQUEUE_EVENT_MANAGER_BUCKET
);
2747 assert(req
->tr_flags
& TR_FLAG_WORKLOOP
);
2749 switch (operation
) {
2750 case WORKQ_THREADREQ_CHANGE_PRI
:
2751 case WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL
:
2752 priclass
= pthread_priority_get_class_index(arg1
);
2753 PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads
| DBG_FUNC_NONE
, wq
, req
, arg1
, 2, 0);
2754 if (req
->tr_priority
== priclass
) {
2757 _threadreq_dequeue(wq
, req
);
2758 req
->tr_priority
= priclass
;
2759 req
->tr_state
= TR_STATE_NEW
; // what was old is new again
2760 wq_tr_rc
= workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, req
, false);
2763 case WORKQ_THREADREQ_CANCEL
:
2764 PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads
| DBG_FUNC_NONE
, wq
, req
, 0, 3, 0);
2765 _threadreq_dequeue(wq
, req
);
2766 req
->tr_state
= TR_STATE_DEAD
;
2775 workqueue_unlock(wq
);
2777 if (wq_tr_rc
== WQ_RUN_TR_THREAD_NEEDED
) {
2778 if (operation
== WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL
) {
2780 } else if (WQ_TIMER_IMMEDIATE_NEEDED(wq
)) {
2781 workqueue_interval_timer_trigger(wq
);
2789 wqops_thread_return(struct proc
*p
, struct workqueue
*wq
)
2791 thread_t th
= current_thread();
2792 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
2793 struct threadlist
*tl
= pthread_kern
->uthread_get_threadlist(uth
);
2795 /* reset signal mask on the workqueue thread to default state */
2796 if (pthread_kern
->uthread_get_sigmask(uth
) != (sigset_t
)(~workq_threadmask
)) {
2797 pthread_kern
->proc_lock(p
);
2798 pthread_kern
->uthread_set_sigmask(uth
, ~workq_threadmask
);
2799 pthread_kern
->proc_unlock(p
);
2802 if (wq
== NULL
|| !tl
) {
2806 PTHREAD_TRACE_WQ(TRACE_wq_override_reset
| DBG_FUNC_START
, tl
->th_workq
, 0, 0, 0, 0);
2809 * This squash call has neat semantics: it removes the specified overrides,
2810 * replacing the current requested QoS with the previous effective QoS from
2811 * those overrides. This means we won't be preempted due to having our QoS
2812 * lowered. Of course, now our understanding of the thread's QoS is wrong,
2813 * so we'll adjust below.
2815 bool was_manager
= (tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
);
2819 new_qos
= pthread_kern
->proc_usynch_thread_qos_squash_override_for_resource(th
,
2820 THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD
,
2821 THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE
);
2824 PTHREAD_TRACE_WQ(TRACE_wq_runitem
| DBG_FUNC_END
, wq
, tl
->th_priority
, 0, 0, 0);
2826 workqueue_lock_spin(wq
);
2828 if (tl
->th_flags
& TH_LIST_KEVENT_BOUND
) {
2829 unsigned int flags
= KEVENT_FLAG_WORKQ
;
2831 flags
|= KEVENT_FLAG_WORKQ_MANAGER
;
2834 tl
->th_flags
|= TH_LIST_UNBINDING
;
2835 workqueue_unlock(wq
);
2836 kevent_qos_internal_unbind(p
, class_index_get_thread_qos(tl
->th_priority
), th
, flags
);
2837 if (!(tl
->th_flags
& TH_LIST_UNBINDING
)) {
2838 _setup_wqthread(p
, th
, wq
, tl
, WQ_SETUP_CLEAR_VOUCHER
);
2839 pthread_kern
->unix_syscall_return(EJUSTRETURN
);
2840 __builtin_unreachable();
2842 workqueue_lock_spin(wq
);
2843 tl
->th_flags
&= ~(TH_LIST_KEVENT_BOUND
| TH_LIST_UNBINDING
);
2847 /* Fix up counters from the squash operation. */
2848 uint8_t old_bucket
= tl
->th_priority
;
2849 uint8_t new_bucket
= thread_qos_get_class_index(new_qos
);
2851 if (old_bucket
!= new_bucket
) {
2852 _wq_thactive_move(wq
, old_bucket
, new_bucket
);
2853 wq
->wq_thscheduled_count
[old_bucket
]--;
2854 wq
->wq_thscheduled_count
[new_bucket
]++;
2856 PTHREAD_TRACE_WQ(TRACE_wq_thread_squash
| DBG_FUNC_NONE
, wq
, tl
->th_priority
, new_bucket
, 0, 0);
2857 tl
->th_priority
= new_bucket
;
2858 PTHREAD_TRACE_WQ(TRACE_wq_override_reset
| DBG_FUNC_END
, tl
->th_workq
, new_qos
, 0, 0, 0);
2862 workqueue_run_threadreq_and_unlock(p
, wq
, tl
, NULL
, false);
2867 * Multiplexed call to interact with the workqueue mechanism
2870 _workq_kernreturn(struct proc
*p
,
2877 struct workqueue
*wq
;
2880 if (pthread_kern
->proc_get_register(p
) == 0) {
2885 case WQOPS_QUEUE_NEWSPISUPP
: {
2887 * arg2 = offset of serialno into dispatch queue
2888 * arg3 = kevent support
2892 // If we get here, then userspace has indicated support for kevent delivery.
2895 pthread_kern
->proc_set_dispatchqueue_serialno_offset(p
, (uint64_t)offset
);
2898 case WQOPS_QUEUE_REQTHREADS
: {
2900 * arg2 = number of threads to start
2903 error
= wqops_queue_reqthreads(p
, arg2
, arg3
);
2906 case WQOPS_SET_EVENT_MANAGER_PRIORITY
: {
2908 * arg2 = priority for the manager thread
2910 * if _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG is set, the
2911 * ~_PTHREAD_PRIORITY_FLAGS_MASK contains a scheduling priority instead
2914 pthread_priority_t pri
= arg2
;
2916 wq
= (struct workqueue
*)pthread_kern
->proc_get_wqptr(p
);
2921 workqueue_lock_spin(wq
);
2922 if (pri
& _PTHREAD_PRIORITY_SCHED_PRI_FLAG
){
2924 * If userspace passes a scheduling priority, that takes precidence
2925 * over any QoS. (So, userspace should take care not to accidenatally
2926 * lower the priority this way.)
2928 uint32_t sched_pri
= pri
& _PTHREAD_PRIORITY_SCHED_PRI_MASK
;
2929 if (wq
->wq_event_manager_priority
& _PTHREAD_PRIORITY_SCHED_PRI_FLAG
){
2930 wq
->wq_event_manager_priority
= MAX(sched_pri
, wq
->wq_event_manager_priority
& _PTHREAD_PRIORITY_SCHED_PRI_MASK
)
2931 | _PTHREAD_PRIORITY_SCHED_PRI_FLAG
| _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
;
2933 wq
->wq_event_manager_priority
= sched_pri
2934 | _PTHREAD_PRIORITY_SCHED_PRI_FLAG
| _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
;
2936 } else if ((wq
->wq_event_manager_priority
& _PTHREAD_PRIORITY_SCHED_PRI_FLAG
) == 0){
2937 int cur_qos
= pthread_priority_get_thread_qos(wq
->wq_event_manager_priority
);
2938 int new_qos
= pthread_priority_get_thread_qos(pri
);
2939 wq
->wq_event_manager_priority
= (uint32_t)thread_qos_get_pthread_priority(MAX(cur_qos
, new_qos
)) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
;
2941 workqueue_unlock(wq
);
2944 case WQOPS_THREAD_KEVENT_RETURN
:
2945 case WQOPS_THREAD_WORKLOOP_RETURN
:
2946 wq
= (struct workqueue
*)pthread_kern
->proc_get_wqptr(p
);
2947 PTHREAD_TRACE_WQ(TRACE_wq_runthread
| DBG_FUNC_END
, wq
, options
, 0, 0, 0);
2948 if (item
!= 0 && arg2
!= 0) {
2949 int32_t kevent_retval
;
2951 if (options
== WQOPS_THREAD_KEVENT_RETURN
) {
2952 ret
= kevent_qos_internal(p
, -1, item
, arg2
, item
, arg2
, NULL
, NULL
,
2953 KEVENT_FLAG_WORKQ
| KEVENT_FLAG_IMMEDIATE
| KEVENT_FLAG_ERROR_EVENTS
,
2955 } else /* options == WQOPS_THREAD_WORKLOOP_RETURN */ {
2956 kqueue_id_t kevent_id
= -1;
2957 ret
= kevent_id_internal(p
, &kevent_id
, item
, arg2
, item
, arg2
,
2959 KEVENT_FLAG_WORKLOOP
| KEVENT_FLAG_IMMEDIATE
| KEVENT_FLAG_ERROR_EVENTS
,
2963 * We shouldn't be getting more errors out than events we put in, so
2964 * reusing the input buffer should always provide enough space. But,
2965 * the assert is commented out since we get errors in edge cases in the
2966 * process lifecycle.
2968 //assert(ret == KERN_SUCCESS && kevent_retval >= 0);
2969 if (ret
!= KERN_SUCCESS
){
2972 } else if (kevent_retval
> 0){
2973 assert(kevent_retval
<= arg2
);
2974 *retval
= kevent_retval
;
2981 case WQOPS_THREAD_RETURN
:
2982 wq
= (struct workqueue
*)pthread_kern
->proc_get_wqptr(p
);
2983 PTHREAD_TRACE_WQ(TRACE_wq_runthread
| DBG_FUNC_END
, wq
, options
, 0, 0, 0);
2985 error
= wqops_thread_return(p
, wq
);
2986 // NOT REACHED except in case of error
2990 case WQOPS_SHOULD_NARROW
: {
2992 * arg2 = priority to test
2995 pthread_priority_t priority
= arg2
;
2996 thread_t th
= current_thread();
2997 struct threadlist
*tl
= util_get_thread_threadlist_entry(th
);
2999 if (tl
== NULL
|| (tl
->th_flags
& TH_LIST_CONSTRAINED
) == 0) {
3004 int class = pthread_priority_get_class_index(priority
);
3006 workqueue_lock_spin(wq
);
3007 bool should_narrow
= !may_start_constrained_thread(wq
, class, tl
, false);
3008 workqueue_unlock(wq
);
3010 *retval
= should_narrow
;
3019 case WQOPS_THREAD_KEVENT_RETURN
:
3020 case WQOPS_THREAD_WORKLOOP_RETURN
:
3021 case WQOPS_THREAD_RETURN
:
3022 PTHREAD_TRACE_WQ(TRACE_wq_runthread
| DBG_FUNC_START
, wq
, options
, 0, 0, 0);
3029 * We have no work to do, park ourselves on the idle list.
3031 * Consumes the workqueue lock and does not return.
3034 parkit(struct workqueue
*wq
, struct threadlist
*tl
, thread_t thread
)
3036 assert(thread
== tl
->th_thread
);
3037 assert(thread
== current_thread());
3039 PTHREAD_TRACE_WQ(TRACE_wq_thread_park
| DBG_FUNC_START
, wq
, 0, 0, 0, 0);
3041 uint32_t us_to_wait
= 0;
3043 TAILQ_REMOVE(&wq
->wq_thrunlist
, tl
, th_entry
);
3045 tl
->th_flags
&= ~TH_LIST_RUNNING
;
3046 tl
->th_flags
&= ~TH_LIST_KEVENT
;
3047 assert((tl
->th_flags
& TH_LIST_KEVENT_BOUND
) == 0);
3049 if (tl
->th_flags
& TH_LIST_CONSTRAINED
) {
3050 wq
->wq_constrained_threads_scheduled
--;
3051 tl
->th_flags
&= ~TH_LIST_CONSTRAINED
;
3054 _wq_thactive_dec(wq
, tl
->th_priority
);
3055 wq
->wq_thscheduled_count
[tl
->th_priority
]--;
3056 wq
->wq_threads_scheduled
--;
3057 uint32_t thidlecount
= ++wq
->wq_thidlecount
;
3059 pthread_kern
->thread_sched_call(thread
, NULL
);
3062 * We'd like to always have one manager thread parked so that we can have
3063 * low latency when we need to bring a manager thread up. If that idle
3064 * thread list is empty, make this thread a manager thread.
3066 * XXX: This doesn't check that there's not a manager thread outstanding,
3067 * so it's based on the assumption that most manager callouts will change
3068 * their QoS before parking. If that stops being true, this may end up
3069 * costing us more than we gain.
3071 if (TAILQ_EMPTY(&wq
->wq_thidlemgrlist
) &&
3072 tl
->th_priority
!= WORKQUEUE_EVENT_MANAGER_BUCKET
){
3073 PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority
| DBG_FUNC_NONE
,
3074 wq
, thread_tid(thread
),
3075 (tl
->th_priority
<< 16) | WORKQUEUE_EVENT_MANAGER_BUCKET
, 2, 0);
3076 reset_priority(tl
, pthread_priority_from_wq_class_index(wq
, WORKQUEUE_EVENT_MANAGER_BUCKET
));
3077 tl
->th_priority
= WORKQUEUE_EVENT_MANAGER_BUCKET
;
3080 if (tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
){
3081 TAILQ_INSERT_HEAD(&wq
->wq_thidlemgrlist
, tl
, th_entry
);
3083 TAILQ_INSERT_HEAD(&wq
->wq_thidlelist
, tl
, th_entry
);
3087 * When we remove the voucher from the thread, we may lose our importance
3088 * causing us to get preempted, so we do this after putting the thread on
3089 * the idle list. That when, when we get our importance back we'll be able
3090 * to use this thread from e.g. the kevent call out to deliver a boosting
3093 tl
->th_flags
|= TH_LIST_REMOVING_VOUCHER
;
3094 workqueue_unlock(wq
);
3095 if (pthread_kern
->thread_will_park_or_terminate
) {
3096 pthread_kern
->thread_will_park_or_terminate(tl
->th_thread
);
3098 __assert_only kern_return_t kr
;
3099 kr
= pthread_kern
->thread_set_voucher_name(MACH_PORT_NULL
);
3100 assert(kr
== KERN_SUCCESS
);
3101 workqueue_lock_spin(wq
);
3102 tl
->th_flags
&= ~(TH_LIST_REMOVING_VOUCHER
);
3104 if ((tl
->th_flags
& TH_LIST_RUNNING
) == 0) {
3105 if (thidlecount
< 101) {
3106 us_to_wait
= wq_reduce_pool_window_usecs
- ((thidlecount
-2) * (wq_reduce_pool_window_usecs
/ 100));
3108 us_to_wait
= wq_reduce_pool_window_usecs
/ 100;
3111 thread_set_pending_block_hint(thread
, kThreadWaitParkedWorkQueue
);
3112 assert_wait_timeout_with_leeway((caddr_t
)tl
, (THREAD_INTERRUPTIBLE
),
3113 TIMEOUT_URGENCY_SYS_BACKGROUND
|TIMEOUT_URGENCY_LEEWAY
, us_to_wait
,
3114 wq_reduce_pool_window_usecs
/10, NSEC_PER_USEC
);
3116 workqueue_unlock(wq
);
3118 thread_block(wq_unpark_continue
);
3119 panic("thread_block(wq_unpark_continue) returned!");
3121 workqueue_unlock(wq
);
3124 * While we'd dropped the lock to unset our voucher, someone came
3125 * around and made us runnable. But because we weren't waiting on the
3126 * event their wakeup() was ineffectual. To correct for that, we just
3127 * run the continuation ourselves.
3129 wq_unpark_continue(NULL
, THREAD_AWAKENED
);
3134 may_start_constrained_thread(struct workqueue
*wq
, uint32_t at_priclass
,
3135 struct threadlist
*tl
, bool may_start_timer
)
3137 uint32_t req_qos
= _wq_thactive_best_constrained_req_qos(wq
);
3138 wq_thactive_t thactive
;
3140 if (may_start_timer
&& at_priclass
< req_qos
) {
3142 * When called from workqueue_run_threadreq_and_unlock() pre-post newest
3143 * higher priorities into the thactive state so that
3144 * workqueue_callback() takes the right decision.
3146 * If the admission check passes, workqueue_run_threadreq_and_unlock
3147 * will reset this value before running the request.
3149 thactive
= _wq_thactive_set_best_constrained_req_qos(wq
, req_qos
,
3152 PTHREAD_TRACE_WQ(TRACE_wq_thactive_update
, 1, (uint64_t)thactive
,
3153 (uint64_t)(thactive
>> 64), 0, 0);
3156 thactive
= _wq_thactive(wq
);
3159 uint32_t constrained_threads
= wq
->wq_constrained_threads_scheduled
;
3160 if (tl
&& (tl
->th_flags
& TH_LIST_CONSTRAINED
)) {
3162 * don't count the current thread as scheduled
3164 constrained_threads
--;
3166 if (constrained_threads
>= wq_max_constrained_threads
) {
3167 PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission
| DBG_FUNC_NONE
, wq
, 1,
3168 wq
->wq_constrained_threads_scheduled
,
3169 wq_max_constrained_threads
, 0);
3171 * we need 1 or more constrained threads to return to the kernel before
3172 * we can dispatch additional work
3178 * Compute a metric for many how many threads are active. We find the
3179 * highest priority request outstanding and then add up the number of
3180 * active threads in that and all higher-priority buckets. We'll also add
3181 * any "busy" threads which are not active but blocked recently enough that
3182 * we can't be sure they've gone idle yet. We'll then compare this metric
3183 * to our max concurrency to decide whether to add a new thread.
3186 uint32_t busycount
, thactive_count
;
3188 thactive_count
= _wq_thactive_aggregate_downto_qos(wq
, thactive
,
3189 at_priclass
, &busycount
, NULL
);
3191 if (tl
&& tl
->th_priority
<= at_priclass
) {
3193 * don't count this thread as currently active
3195 assert(thactive_count
> 0);
3199 if (thactive_count
+ busycount
< wq_max_concurrency
[at_priclass
]) {
3200 PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission
| DBG_FUNC_NONE
, wq
, 2,
3201 thactive_count
, busycount
, 0);
3204 PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission
| DBG_FUNC_NONE
, wq
, 3,
3205 thactive_count
, busycount
, 0);
3208 if (busycount
&& may_start_timer
) {
3210 * If this is called from the add timer, we won't have another timer
3211 * fire when the thread exits the "busy" state, so rearm the timer.
3213 if (WQ_TIMER_DELAYED_NEEDED(wq
)) {
3214 workqueue_interval_timer_start(wq
);
3221 static struct threadlist
*
3222 pop_from_thidlelist(struct workqueue
*wq
, uint32_t priclass
)
3224 assert(wq
->wq_thidlecount
);
3226 struct threadlist
*tl
= NULL
;
3228 if (!TAILQ_EMPTY(&wq
->wq_thidlemgrlist
) &&
3229 (priclass
== WORKQUEUE_EVENT_MANAGER_BUCKET
|| TAILQ_EMPTY(&wq
->wq_thidlelist
))){
3230 tl
= TAILQ_FIRST(&wq
->wq_thidlemgrlist
);
3231 TAILQ_REMOVE(&wq
->wq_thidlemgrlist
, tl
, th_entry
);
3232 assert(tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
);
3233 } else if (!TAILQ_EMPTY(&wq
->wq_thidlelist
) &&
3234 (priclass
!= WORKQUEUE_EVENT_MANAGER_BUCKET
|| TAILQ_EMPTY(&wq
->wq_thidlemgrlist
))){
3235 tl
= TAILQ_FIRST(&wq
->wq_thidlelist
);
3236 TAILQ_REMOVE(&wq
->wq_thidlelist
, tl
, th_entry
);
3237 assert(tl
->th_priority
!= WORKQUEUE_EVENT_MANAGER_BUCKET
);
3239 panic("pop_from_thidlelist called with no threads available");
3241 assert((tl
->th_flags
& TH_LIST_RUNNING
) == 0);
3243 assert(wq
->wq_thidlecount
);
3244 wq
->wq_thidlecount
--;
3246 TAILQ_INSERT_TAIL(&wq
->wq_thrunlist
, tl
, th_entry
);
3248 tl
->th_flags
|= TH_LIST_RUNNING
| TH_LIST_BUSY
;
3250 wq
->wq_threads_scheduled
++;
3251 wq
->wq_thscheduled_count
[priclass
]++;
3252 _wq_thactive_inc(wq
, priclass
);
3256 static pthread_priority_t
3257 pthread_priority_from_wq_class_index(struct workqueue
*wq
, int index
)
3259 if (index
== WORKQUEUE_EVENT_MANAGER_BUCKET
){
3260 return wq
->wq_event_manager_priority
;
3262 return class_index_get_pthread_priority(index
);
3267 reset_priority(struct threadlist
*tl
, pthread_priority_t pri
)
3270 thread_t th
= tl
->th_thread
;
3272 if ((pri
& _PTHREAD_PRIORITY_SCHED_PRI_FLAG
) == 0){
3273 ret
= pthread_kern
->thread_set_workq_qos(th
, pthread_priority_get_thread_qos(pri
), 0);
3274 assert(ret
== KERN_SUCCESS
|| ret
== KERN_TERMINATED
);
3276 if (tl
->th_flags
& TH_LIST_EVENT_MGR_SCHED_PRI
) {
3278 /* Reset priority to default (masked by QoS) */
3280 ret
= pthread_kern
->thread_set_workq_pri(th
, 31, POLICY_TIMESHARE
);
3281 assert(ret
== KERN_SUCCESS
|| ret
== KERN_TERMINATED
);
3283 tl
->th_flags
&= ~TH_LIST_EVENT_MGR_SCHED_PRI
;
3286 ret
= pthread_kern
->thread_set_workq_qos(th
, THREAD_QOS_UNSPECIFIED
, 0);
3287 assert(ret
== KERN_SUCCESS
|| ret
== KERN_TERMINATED
);
3288 ret
= pthread_kern
->thread_set_workq_pri(th
, (pri
& (~_PTHREAD_PRIORITY_FLAGS_MASK
)), POLICY_TIMESHARE
);
3289 assert(ret
== KERN_SUCCESS
|| ret
== KERN_TERMINATED
);
3291 tl
->th_flags
|= TH_LIST_EVENT_MGR_SCHED_PRI
;
3296 * Picks the best request to run, and returns the best overcommit fallback
3297 * if the best pick is non overcommit and risks failing its admission check.
3299 static struct threadreq
*
3300 workqueue_best_threadreqs(struct workqueue
*wq
, struct threadlist
*tl
,
3301 struct threadreq
**fallback
)
3303 struct threadreq
*req
, *best_req
= NULL
;
3304 int priclass
, prilimit
;
3306 if ((wq
->wq_event_manager_threadreq
.tr_state
== TR_STATE_WAITING
) &&
3307 ((wq
->wq_thscheduled_count
[WORKQUEUE_EVENT_MANAGER_BUCKET
] == 0) ||
3308 (tl
&& tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
))) {
3310 * There's an event manager request and either:
3311 * - no event manager currently running
3312 * - we are re-using the event manager
3314 req
= &wq
->wq_event_manager_threadreq
;
3315 PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select
| DBG_FUNC_NONE
, wq
, req
, 1, 0, 0);
3320 prilimit
= WORKQUEUE_EVENT_MANAGER_BUCKET
;
3322 prilimit
= _wq_highest_paced_priority(wq
);
3324 for (priclass
= 0; priclass
< prilimit
; priclass
++) {
3325 req
= TAILQ_FIRST(&wq
->wq_overcommit_reqlist
[priclass
]);
3327 PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select
| DBG_FUNC_NONE
, wq
, req
, 2, 0, 0);
3336 best_req
= TAILQ_FIRST(&wq
->wq_reqlist
[priclass
]);
3338 PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select
| DBG_FUNC_NONE
, wq
, best_req
, 3, 0, 0);
3346 * Runs a thread request on a thread
3348 * - if thread is THREAD_NULL, will find a thread and run the request there.
3349 * Otherwise, the thread must be the current thread.
3351 * - if req is NULL, will find the highest priority request and run that. If
3352 * it is not NULL, it must be a threadreq object in state NEW. If it can not
3353 * be run immediately, it will be enqueued and moved to state WAITING.
3355 * Either way, the thread request object serviced will be moved to state
3356 * PENDING and attached to the threadlist.
3358 * Should be called with the workqueue lock held. Will drop it.
3360 * WARNING: _workq_kevent_reqthreads needs to be able to preflight any
3361 * admission checks in this function. If you are changing this function,
3362 * keep that one up-to-date.
3364 * - if parking_tl is non NULL, then the current thread is parking. This will
3365 * try to reuse this thread for a request. If no match is found, it will be
3369 workqueue_run_threadreq_and_unlock(proc_t p
, struct workqueue
*wq
,
3370 struct threadlist
*parking_tl
, struct threadreq
*req
,
3371 bool may_add_new_thread
)
3373 struct threadreq
*incoming_req
= req
;
3375 struct threadlist
*tl
= parking_tl
;
3376 int rc
= WQ_RUN_TR_THROTTLED
;
3378 assert(tl
== NULL
|| tl
->th_thread
== current_thread());
3379 assert(req
== NULL
|| req
->tr_state
== TR_STATE_NEW
);
3380 assert(!may_add_new_thread
|| !tl
);
3382 PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq
| DBG_FUNC_START
, wq
, req
,
3383 tl
? thread_tid(tl
->th_thread
) : 0,
3384 req
? (req
->tr_priority
<< 16 | req
->tr_flags
) : 0, 0);
3387 * Special cases when provided an event manager request
3389 if (req
&& req
->tr_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
) {
3390 // Clients must not rely on identity of event manager requests
3391 assert(req
->tr_flags
& TR_FLAG_ONSTACK
);
3392 // You can't be both overcommit and event manager
3393 assert((req
->tr_flags
& TR_FLAG_OVERCOMMIT
) == 0);
3396 * We can only ever have one event manager request, so coalesce them if
3397 * there's already one outstanding.
3399 if (wq
->wq_event_manager_threadreq
.tr_state
== TR_STATE_WAITING
) {
3400 PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_mgr_merge
| DBG_FUNC_NONE
, wq
, req
, 0, 0, 0);
3402 struct threadreq
*existing_req
= &wq
->wq_event_manager_threadreq
;
3403 if (req
->tr_flags
& TR_FLAG_KEVENT
) {
3404 existing_req
->tr_flags
|= TR_FLAG_KEVENT
;
3408 incoming_req
= NULL
;
3411 if (wq
->wq_thscheduled_count
[WORKQUEUE_EVENT_MANAGER_BUCKET
] &&
3412 (!tl
|| tl
->th_priority
!= WORKQUEUE_EVENT_MANAGER_BUCKET
)){
3414 * There can only be one event manager running at a time.
3416 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq
| DBG_FUNC_END
, wq
, 1, 0, 0, 0);
3421 again
: // Start again after creating a thread
3423 if (_wq_exiting(wq
)) {
3424 rc
= WQ_RUN_TR_EXITING
;
3429 * Thread request selection and admission control
3431 struct threadreq
*fallback
= NULL
;
3433 if ((req
->tr_flags
& TR_FLAG_NO_PACING
) == 0 &&
3434 _wq_should_pace_priority(wq
, req
->tr_priority
)) {
3436 * If a request fails the pacing admission check, then thread
3437 * requests are redriven when the pacing thread is finally scheduled
3438 * when it calls _wq_pacing_end() in wq_unpark_continue().
3442 } else if (wq
->wq_reqcount
== 0) {
3443 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq
| DBG_FUNC_END
, wq
, 2, 0, 0, 0);
3445 } else if ((req
= workqueue_best_threadreqs(wq
, tl
, &fallback
)) == NULL
) {
3446 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq
| DBG_FUNC_END
, wq
, 3, 0, 0, 0);
3450 if ((req
->tr_flags
& TR_FLAG_OVERCOMMIT
) == 0 &&
3451 (req
->tr_priority
< WORKQUEUE_EVENT_MANAGER_BUCKET
)) {
3452 if (!may_start_constrained_thread(wq
, req
->tr_priority
, parking_tl
, true)) {
3454 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq
| DBG_FUNC_END
, wq
, 4, 0, 0, 0);
3457 assert(req
->tr_state
== TR_STATE_WAITING
);
3466 if (tl
->th_priority
!= req
->tr_priority
) {
3467 _wq_thactive_move(wq
, tl
->th_priority
, req
->tr_priority
);
3468 wq
->wq_thscheduled_count
[tl
->th_priority
]--;
3469 wq
->wq_thscheduled_count
[req
->tr_priority
]++;
3471 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select
| DBG_FUNC_NONE
,
3472 wq
, 1, thread_tid(tl
->th_thread
), 0, 0);
3473 } else if (wq
->wq_thidlecount
) {
3474 tl
= pop_from_thidlelist(wq
, req
->tr_priority
);
3476 * This call will update wq_thscheduled_count and wq_thactive_count for
3477 * the provided priority. It will not set the returned thread to that
3478 * priority. This matches the behavior of the parking_tl clause above.
3480 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select
| DBG_FUNC_NONE
,
3481 wq
, 2, thread_tid(tl
->th_thread
), 0, 0);
3482 } else /* no idle threads */ {
3483 if (!may_add_new_thread
|| wq
->wq_nthreads
>= wq_max_threads
) {
3484 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq
| DBG_FUNC_END
, wq
, 5,
3485 may_add_new_thread
, wq
->wq_nthreads
, 0);
3486 if (wq
->wq_nthreads
< wq_max_threads
) {
3487 rc
= WQ_RUN_TR_THREAD_NEEDED
;
3492 bool added_thread
= workqueue_addnewthread(p
, wq
);
3494 * workqueue_addnewthread will drop and re-take the lock, so we
3495 * need to ensure we still have a cached request.
3497 * It also means we have to pick a new request, since our old pick may
3498 * not be valid anymore.
3501 if (req
&& (req
->tr_flags
& TR_FLAG_ONSTACK
)) {
3502 _threadreq_copy_prepare(wq
);
3506 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select
| DBG_FUNC_NONE
,
3509 } else if (_wq_exiting(wq
)) {
3510 rc
= WQ_RUN_TR_EXITING
;
3513 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq
| DBG_FUNC_END
, wq
, 6, 0, 0, 0);
3515 * Something caused thread creation to fail. Kick off the timer in
3516 * the hope that it'll succeed next time.
3518 if (WQ_TIMER_DELAYED_NEEDED(wq
)) {
3519 workqueue_interval_timer_start(wq
);
3526 * Setup thread, mark request as complete and run with it.
3528 if (req
->tr_state
== TR_STATE_WAITING
) {
3529 _threadreq_dequeue(wq
, req
);
3531 if (tl
->th_priority
!= req
->tr_priority
) {
3532 PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority
| DBG_FUNC_NONE
,
3533 wq
, thread_tid(tl
->th_thread
),
3534 (tl
->th_priority
<< 16) | req
->tr_priority
, 1, 0);
3535 reset_priority(tl
, pthread_priority_from_wq_class_index(wq
, req
->tr_priority
));
3536 tl
->th_priority
= (uint8_t)req
->tr_priority
;
3538 if (req
->tr_flags
& TR_FLAG_OVERCOMMIT
) {
3539 if ((tl
->th_flags
& TH_LIST_CONSTRAINED
) != 0) {
3540 tl
->th_flags
&= ~TH_LIST_CONSTRAINED
;
3541 wq
->wq_constrained_threads_scheduled
--;
3544 if ((tl
->th_flags
& TH_LIST_CONSTRAINED
) == 0) {
3545 tl
->th_flags
|= TH_LIST_CONSTRAINED
;
3546 wq
->wq_constrained_threads_scheduled
++;
3550 if (!parking_tl
&& !(req
->tr_flags
& TR_FLAG_NO_PACING
)) {
3551 _wq_pacing_start(wq
, tl
);
3553 if ((req
->tr_flags
& TR_FLAG_OVERCOMMIT
) == 0) {
3554 uint32_t old_qos
, new_qos
;
3557 * If we are scheduling a constrained thread request, we may need to
3558 * update the best constrained qos in the thactive atomic state.
3560 for (new_qos
= 0; new_qos
< WQ_THACTIVE_NO_PENDING_REQUEST
; new_qos
++) {
3561 if (TAILQ_FIRST(&wq
->wq_reqlist
[new_qos
]))
3564 old_qos
= _wq_thactive_best_constrained_req_qos(wq
);
3565 if (old_qos
!= new_qos
) {
3566 wq_thactive_t v
= _wq_thactive_set_best_constrained_req_qos(wq
,
3569 PTHREAD_TRACE_WQ(TRACE_wq_thactive_update
, 2, (uint64_t)v
,
3570 (uint64_t)(v
>> 64), 0, 0);
3572 PTHREAD_TRACE_WQ(TRACE_wq_thactive_update
, 2, v
, 0, 0, 0);
3577 uint32_t upcall_flags
= WQ_FLAG_THREAD_NEWSPI
;
3578 if (req
->tr_flags
& TR_FLAG_OVERCOMMIT
)
3579 upcall_flags
|= WQ_FLAG_THREAD_OVERCOMMIT
;
3580 if (req
->tr_flags
& TR_FLAG_KEVENT
)
3581 upcall_flags
|= WQ_FLAG_THREAD_KEVENT
;
3582 if (req
->tr_flags
& TR_FLAG_WORKLOOP
)
3583 upcall_flags
|= WQ_FLAG_THREAD_WORKLOOP
| WQ_FLAG_THREAD_KEVENT
;
3584 if (tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
)
3585 upcall_flags
|= WQ_FLAG_THREAD_EVENT_MANAGER
;
3586 tl
->th_upcall_flags
= upcall_flags
>> WQ_FLAG_THREAD_PRIOSHIFT
;
3588 if (req
->tr_flags
& TR_FLAG_KEVENT
) {
3589 tl
->th_flags
|= TH_LIST_KEVENT
;
3591 tl
->th_flags
&= ~TH_LIST_KEVENT
;
3593 return _threadreq_complete_and_unlock(p
, wq
, req
, tl
);
3597 _threadreq_enqueue(wq
, incoming_req
);
3602 if (parking_tl
&& !(parking_tl
->th_flags
& TH_LIST_UNBINDING
)) {
3603 parkit(wq
, parking_tl
, parking_tl
->th_thread
);
3604 __builtin_unreachable();
3607 workqueue_unlock(wq
);
3613 * parked thread wakes up
3616 wq_unpark_continue(void* __unused ptr
, wait_result_t wait_result
)
3618 boolean_t first_use
= false;
3619 thread_t th
= current_thread();
3620 proc_t p
= current_proc();
3622 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
3623 if (uth
== NULL
) goto done
;
3625 struct workqueue
*wq
= pthread_kern
->proc_get_wqptr(p
);
3626 if (wq
== NULL
) goto done
;
3628 workqueue_lock_spin(wq
);
3630 struct threadlist
*tl
= pthread_kern
->uthread_get_threadlist(uth
);
3631 assert(tl
!= WQ_THREADLIST_EXITING_POISON
);
3634 * We woke up before addnewthread() was finished setting us up. Go
3635 * ahead and exit, but before we do poison the threadlist variable so
3636 * that addnewthread() doesn't think we are valid still.
3638 pthread_kern
->uthread_set_threadlist(uth
, WQ_THREADLIST_EXITING_POISON
);
3639 workqueue_unlock(wq
);
3643 assert(tl
->th_flags
& TH_LIST_INITED
);
3645 if ((tl
->th_flags
& TH_LIST_NEW
)){
3646 tl
->th_flags
&= ~(TH_LIST_NEW
);
3650 if ((tl
->th_flags
& (TH_LIST_RUNNING
| TH_LIST_BUSY
)) == TH_LIST_RUNNING
) {
3652 * The normal wakeup path.
3654 goto return_to_user
;
3657 if ((tl
->th_flags
& TH_LIST_RUNNING
) == 0 &&
3658 wait_result
== THREAD_TIMED_OUT
&&
3659 tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
&&
3660 TAILQ_FIRST(&wq
->wq_thidlemgrlist
) == tl
&&
3661 TAILQ_NEXT(tl
, th_entry
) == NULL
){
3663 * If we are the only idle manager and we pop'ed for self-destruction,
3664 * then don't actually exit. Instead, free our stack to save some
3665 * memory and re-park.
3668 workqueue_unlock(wq
);
3670 vm_map_t vmap
= wq
->wq_map
;
3672 // Keep this in sync with _setup_wqthread()
3673 const vm_size_t guardsize
= vm_map_page_size(vmap
);
3674 const user_addr_t freeaddr
= (user_addr_t
)tl
->th_stackaddr
+ guardsize
;
3675 const vm_map_offset_t freesize
= vm_map_trunc_page_mask((PTH_DEFAULT_STACKSIZE
+ guardsize
+ PTHREAD_T_OFFSET
) - 1, vm_map_page_mask(vmap
)) - guardsize
;
3677 __assert_only
int kr
= mach_vm_behavior_set(vmap
, freeaddr
, freesize
, VM_BEHAVIOR_REUSABLE
);
3679 if (kr
!= KERN_SUCCESS
&& kr
!= KERN_INVALID_ADDRESS
) {
3680 os_log_error(OS_LOG_DEFAULT
, "unable to make thread stack reusable (kr: %d)", kr
);
3684 workqueue_lock_spin(wq
);
3686 if ( !(tl
->th_flags
& TH_LIST_RUNNING
)) {
3687 thread_set_pending_block_hint(th
, kThreadWaitParkedWorkQueue
);
3688 assert_wait((caddr_t
)tl
, (THREAD_INTERRUPTIBLE
));
3690 workqueue_unlock(wq
);
3692 thread_block(wq_unpark_continue
);
3693 __builtin_unreachable();
3697 if ((tl
->th_flags
& TH_LIST_RUNNING
) == 0) {
3698 assert((tl
->th_flags
& TH_LIST_BUSY
) == 0);
3700 PTHREAD_TRACE_WQ(TRACE_wq_thread_park
| DBG_FUNC_END
, wq
, 0, 0, 0, 0);
3703 * We were set running, but not for the purposes of actually running.
3704 * This could be because the timer elapsed. Or it could be because the
3705 * thread aborted. Either way, we need to return to userspace to exit.
3707 * The call to workqueue_removethread will consume the lock.
3711 (tl
->th_priority
< qos_class_get_class_index(WQ_THREAD_CLEANUP_QOS
) ||
3712 (tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
))) {
3713 // Reset the QoS to something low for the pthread cleanup
3714 PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority
| DBG_FUNC_NONE
,
3716 (tl
->th_priority
<< 16) | qos_class_get_class_index(WQ_THREAD_CLEANUP_QOS
), 3, 0);
3717 pthread_priority_t cleanup_pri
= _pthread_priority_make_newest(WQ_THREAD_CLEANUP_QOS
, 0, 0);
3718 reset_priority(tl
, cleanup_pri
);
3721 workqueue_removethread(tl
, 0, first_use
);
3724 pthread_kern
->thread_bootstrap_return();
3726 pthread_kern
->unix_syscall_return(0);
3728 __builtin_unreachable();
3732 * The timer woke us up or the thread was aborted. However, we have
3733 * already started to make this a runnable thread. Wait for that to
3734 * finish, then continue to userspace.
3736 while ((tl
->th_flags
& TH_LIST_BUSY
)) {
3737 assert_wait((caddr_t
)tl
, (THREAD_UNINT
));
3739 workqueue_unlock(wq
);
3741 thread_block(THREAD_CONTINUE_NULL
);
3743 workqueue_lock_spin(wq
);
3748 PTHREAD_TRACE_WQ(TRACE_wq_thread_park
| DBG_FUNC_END
, wq
, 0, 0, 0, 0);
3750 if (_wq_pacing_end(wq
, tl
) && wq
->wq_reqcount
) {
3751 workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, NULL
, true);
3753 workqueue_unlock(wq
);
3755 _setup_wqthread(p
, th
, wq
, tl
, first_use
? WQ_SETUP_FIRST_USE
: 0);
3756 pthread_kern
->thread_sched_call(th
, workqueue_callback
);
3759 pthread_kern
->thread_bootstrap_return();
3761 pthread_kern
->unix_syscall_return(EJUSTRETURN
);
3763 panic("Our attempt to return to userspace failed...");
3767 * configures initial thread stack/registers to jump into:
3768 * _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int upcall_flags, int nkevents);
3769 * to get there we jump through assembily stubs in pthread_asm.s. Those
3770 * routines setup a stack frame, using the current stack pointer, and marshall
3771 * arguments from registers to the stack as required by the ABI.
3773 * One odd thing we do here is to start the pthread_t 4k below what would be the
3774 * top of the stack otherwise. This is because usually only the first 4k of the
3775 * pthread_t will be used and so we want to put it on the same 16k page as the
3776 * top of the stack to save memory.
3778 * When we are done the stack will look like:
3779 * |-----------| th_stackaddr + th_allocsize
3780 * |pthread_t | th_stackaddr + DEFAULT_STACKSIZE + guardsize + PTHREAD_STACK_OFFSET
3781 * |kevent list| optionally - at most WQ_KEVENT_LIST_LEN events
3782 * |kevent data| optionally - at most WQ_KEVENT_DATA_SIZE bytes
3783 * |stack gap | bottom aligned to 16 bytes, and at least as big as stack_gap_min
3787 * |guard page | guardsize
3788 * |-----------| th_stackaddr
3791 _setup_wqthread(proc_t p
, thread_t th
, struct workqueue
*wq
,
3792 struct threadlist
*tl
, int setup_flags
)
3795 if (setup_flags
& WQ_SETUP_CLEAR_VOUCHER
) {
3797 * For preemption reasons, we want to reset the voucher as late as
3798 * possible, so we do it in two places:
3799 * - Just before parking (i.e. in parkit())
3800 * - Prior to doing the setup for the next workitem (i.e. here)
3802 * Those two places are sufficient to ensure we always reset it before
3803 * it goes back out to user space, but be careful to not break that
3806 __assert_only kern_return_t kr
;
3807 kr
= pthread_kern
->thread_set_voucher_name(MACH_PORT_NULL
);
3808 assert(kr
== KERN_SUCCESS
);
3811 uint32_t upcall_flags
= tl
->th_upcall_flags
<< WQ_FLAG_THREAD_PRIOSHIFT
;
3812 if (!(setup_flags
& WQ_SETUP_FIRST_USE
)) {
3813 upcall_flags
|= WQ_FLAG_THREAD_REUSE
;
3817 * Put the QoS class value into the lower bits of the reuse_thread register, this is where
3818 * the thread priority used to be stored anyway.
3820 pthread_priority_t priority
= pthread_priority_from_wq_class_index(wq
, tl
->th_priority
);
3821 upcall_flags
|= (_pthread_priority_get_qos_newest(priority
) & WQ_FLAG_THREAD_PRIOMASK
);
3823 const vm_size_t guardsize
= vm_map_page_size(tl
->th_workq
->wq_map
);
3824 const vm_size_t stack_gap_min
= (proc_is64bit(p
) == 0) ? C_32_STK_ALIGN
: C_64_REDZONE_LEN
;
3825 const vm_size_t stack_align_min
= (proc_is64bit(p
) == 0) ? C_32_STK_ALIGN
: C_64_STK_ALIGN
;
3827 user_addr_t pthread_self_addr
= (user_addr_t
)(tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ guardsize
+ PTHREAD_T_OFFSET
);
3828 user_addr_t stack_top_addr
= (user_addr_t
)((pthread_self_addr
- stack_gap_min
) & -stack_align_min
);
3829 user_addr_t stack_bottom_addr
= (user_addr_t
)(tl
->th_stackaddr
+ guardsize
);
3831 user_addr_t wqstart_fnptr
= pthread_kern
->proc_get_wqthread(p
);
3832 if (!wqstart_fnptr
) {
3833 panic("workqueue thread start function pointer is NULL");
3836 if (setup_flags
& WQ_SETUP_FIRST_USE
) {
3837 uint32_t tsd_offset
= pthread_kern
->proc_get_pthread_tsd_offset(p
);
3839 mach_vm_offset_t th_tsd_base
= (mach_vm_offset_t
)pthread_self_addr
+ tsd_offset
;
3840 kern_return_t kret
= pthread_kern
->thread_set_tsd_base(th
, th_tsd_base
);
3841 if (kret
== KERN_SUCCESS
) {
3842 upcall_flags
|= WQ_FLAG_THREAD_TSD_BASE_SET
;
3847 * Pre-fault the first page of the new thread's stack and the page that will
3848 * contain the pthread_t structure.
3850 vm_map_t vmap
= pthread_kern
->current_map();
3851 if (vm_map_trunc_page_mask((vm_map_offset_t
)(stack_top_addr
- C_64_REDZONE_LEN
), vm_map_page_mask(vmap
)) !=
3852 vm_map_trunc_page_mask((vm_map_offset_t
)pthread_self_addr
, vm_map_page_mask(vmap
))){
3854 vm_map_trunc_page_mask((vm_map_offset_t
)(stack_top_addr
- C_64_REDZONE_LEN
), vm_map_page_mask(vmap
)),
3855 VM_PROT_READ
| VM_PROT_WRITE
,
3857 THREAD_UNINT
, NULL
, 0);
3860 vm_map_trunc_page_mask((vm_map_offset_t
)pthread_self_addr
, vm_map_page_mask(vmap
)),
3861 VM_PROT_READ
| VM_PROT_WRITE
,
3863 THREAD_UNINT
, NULL
, 0);
3866 user_addr_t kevent_list
= NULL
;
3867 int kevent_count
= 0;
3868 if (upcall_flags
& WQ_FLAG_THREAD_KEVENT
){
3869 bool workloop
= upcall_flags
& WQ_FLAG_THREAD_WORKLOOP
;
3871 kevent_list
= pthread_self_addr
- WQ_KEVENT_LIST_LEN
* sizeof(struct kevent_qos_s
);
3872 kevent_count
= WQ_KEVENT_LIST_LEN
;
3874 user_addr_t kevent_id_addr
= kevent_list
;
3877 * The kevent ID goes just below the kevent list. Sufficiently new
3878 * userspace will know to look there. Old userspace will just
3881 kevent_id_addr
-= sizeof(kqueue_id_t
);
3884 user_addr_t kevent_data_buf
= kevent_id_addr
- WQ_KEVENT_DATA_SIZE
;
3885 user_size_t kevent_data_available
= WQ_KEVENT_DATA_SIZE
;
3887 int32_t events_out
= 0;
3889 assert(tl
->th_flags
| TH_LIST_KEVENT_BOUND
);
3890 unsigned int flags
= KEVENT_FLAG_STACK_DATA
| KEVENT_FLAG_IMMEDIATE
;
3891 if (tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
) {
3892 flags
|= KEVENT_FLAG_WORKQ_MANAGER
;
3896 flags
|= KEVENT_FLAG_WORKLOOP
;
3897 kqueue_id_t kevent_id
= -1;
3898 ret
= kevent_id_internal(p
, &kevent_id
,
3899 NULL
, 0, kevent_list
, kevent_count
,
3900 kevent_data_buf
, &kevent_data_available
,
3901 flags
, &events_out
);
3902 copyout(&kevent_id
, kevent_id_addr
, sizeof(kevent_id
));
3904 flags
|= KEVENT_FLAG_WORKQ
;
3905 ret
= kevent_qos_internal(p
,
3906 class_index_get_thread_qos(tl
->th_priority
),
3907 NULL
, 0, kevent_list
, kevent_count
,
3908 kevent_data_buf
, &kevent_data_available
,
3909 flags
, &events_out
);
3912 // squash any errors into just empty output
3913 if (ret
!= KERN_SUCCESS
|| events_out
== -1){
3915 kevent_data_available
= WQ_KEVENT_DATA_SIZE
;
3918 // We shouldn't get data out if there aren't events available
3919 assert(events_out
!= 0 || kevent_data_available
== WQ_KEVENT_DATA_SIZE
);
3921 if (events_out
> 0){
3922 if (kevent_data_available
== WQ_KEVENT_DATA_SIZE
){
3923 stack_top_addr
= (kevent_id_addr
- stack_gap_min
) & -stack_align_min
;
3925 stack_top_addr
= (kevent_data_buf
+ kevent_data_available
- stack_gap_min
) & -stack_align_min
;
3928 kevent_count
= events_out
;
3935 PTHREAD_TRACE_WQ(TRACE_wq_runthread
| DBG_FUNC_START
, wq
, 0, 0, 0, 0);
3937 #if defined(__i386__) || defined(__x86_64__)
3938 if (proc_is64bit(p
) == 0) {
3939 x86_thread_state32_t state
= {
3940 .eip
= (unsigned int)wqstart_fnptr
,
3941 .eax
= /* arg0 */ (unsigned int)pthread_self_addr
,
3942 .ebx
= /* arg1 */ (unsigned int)tl
->th_thport
,
3943 .ecx
= /* arg2 */ (unsigned int)stack_bottom_addr
,
3944 .edx
= /* arg3 */ (unsigned int)kevent_list
,
3945 .edi
= /* arg4 */ (unsigned int)upcall_flags
,
3946 .esi
= /* arg5 */ (unsigned int)kevent_count
,
3948 .esp
= (int)((vm_offset_t
)stack_top_addr
),
3951 error
= pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
3952 if (error
!= KERN_SUCCESS
) {
3953 panic(__func__
": thread_set_wq_state failed: %d", error
);
3956 x86_thread_state64_t state64
= {
3957 // x86-64 already passes all the arguments in registers, so we just put them in their final place here
3958 .rip
= (uint64_t)wqstart_fnptr
,
3959 .rdi
= (uint64_t)pthread_self_addr
,
3960 .rsi
= (uint64_t)tl
->th_thport
,
3961 .rdx
= (uint64_t)stack_bottom_addr
,
3962 .rcx
= (uint64_t)kevent_list
,
3963 .r8
= (uint64_t)upcall_flags
,
3964 .r9
= (uint64_t)kevent_count
,
3966 .rsp
= (uint64_t)(stack_top_addr
)
3969 error
= pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state64
);
3970 if (error
!= KERN_SUCCESS
) {
3971 panic(__func__
": thread_set_wq_state failed: %d", error
);
3975 #error setup_wqthread not defined for this architecture
3980 static int wq_kevent_test SYSCTL_HANDLER_ARGS
{
3981 //(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req)
3982 #pragma unused(oidp, arg1, arg2)
3984 struct workq_reqthreads_req_s requests
[64] = {};
3986 if (req
->newlen
> sizeof(requests
) || req
->newlen
< sizeof(struct workq_reqthreads_req_s
))
3989 error
= copyin(req
->newptr
, requests
, req
->newlen
);
3990 if (error
) return error
;
3992 _workq_reqthreads(req
->p
, (int)(req
->newlen
/ sizeof(struct workq_reqthreads_req_s
)), requests
);
4001 _fill_procworkqueue(proc_t p
, struct proc_workqueueinfo
* pwqinfo
)
4003 struct workqueue
* wq
;
4007 if ((wq
= pthread_kern
->proc_get_wqptr(p
)) == NULL
) {
4012 * This is sometimes called from interrupt context by the kperf sampler.
4013 * In that case, it's not safe to spin trying to take the lock since we
4014 * might already hold it. So, we just try-lock it and error out if it's
4015 * already held. Since this is just a debugging aid, and all our callers
4016 * are able to handle an error, that's fine.
4018 bool locked
= workqueue_lock_try(wq
);
4023 activecount
= _wq_thactive_aggregate_downto_qos(wq
, _wq_thactive(wq
),
4024 WORKQUEUE_NUM_BUCKETS
- 1, NULL
, NULL
);
4025 pwqinfo
->pwq_nthreads
= wq
->wq_nthreads
;
4026 pwqinfo
->pwq_runthreads
= activecount
;
4027 pwqinfo
->pwq_blockedthreads
= wq
->wq_threads_scheduled
- activecount
;
4028 pwqinfo
->pwq_state
= 0;
4030 if (wq
->wq_constrained_threads_scheduled
>= wq_max_constrained_threads
) {
4031 pwqinfo
->pwq_state
|= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT
;
4034 if (wq
->wq_nthreads
>= wq_max_threads
) {
4035 pwqinfo
->pwq_state
|= WQ_EXCEEDED_TOTAL_THREAD_LIMIT
;
4038 workqueue_unlock(wq
);
4043 _get_pwq_state_kdp(proc_t p
)
4049 struct workqueue
*wq
= pthread_kern
->proc_get_wqptr(p
);
4051 if (wq
== NULL
|| workqueue_lock_spin_is_acquired_kdp(wq
)) {
4055 uint32_t pwq_state
= WQ_FLAGS_AVAILABLE
;
4057 if (wq
->wq_constrained_threads_scheduled
>= wq_max_constrained_threads
) {
4058 pwq_state
|= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT
;
4061 if (wq
->wq_nthreads
>= wq_max_threads
) {
4062 pwq_state
|= WQ_EXCEEDED_TOTAL_THREAD_LIMIT
;
4069 _thread_selfid(__unused
struct proc
*p
, uint64_t *retval
)
4071 thread_t thread
= current_thread();
4072 *retval
= thread_tid(thread
);
4073 return KERN_SUCCESS
;
4079 pthread_lck_grp_attr
= lck_grp_attr_alloc_init();
4080 pthread_lck_grp
= lck_grp_alloc_init("pthread", pthread_lck_grp_attr
);
4083 * allocate the lock attribute for pthread synchronizers
4085 pthread_lck_attr
= lck_attr_alloc_init();
4087 pthread_list_mlock
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
4089 pth_global_hashinit();
4090 psynch_thcall
= thread_call_allocate(psynch_wq_cleanup
, NULL
);
4093 pthread_zone_workqueue
= zinit(sizeof(struct workqueue
),
4094 1024 * sizeof(struct workqueue
), 8192, "pthread.workqueue");
4095 pthread_zone_threadlist
= zinit(sizeof(struct threadlist
),
4096 1024 * sizeof(struct threadlist
), 8192, "pthread.threadlist");
4097 pthread_zone_threadreq
= zinit(sizeof(struct threadreq
),
4098 1024 * sizeof(struct threadreq
), 8192, "pthread.threadreq");
4101 if (PE_parse_boot_argn("pthread_mutex_default_policy", &policy_bootarg
, sizeof(policy_bootarg
))) {
4102 pthread_mutex_default_policy
= policy_bootarg
;
4108 sysctl_register_oid(&sysctl__kern_wq_stalled_window_usecs
);
4109 sysctl_register_oid(&sysctl__kern_wq_reduce_pool_window_usecs
);
4110 sysctl_register_oid(&sysctl__kern_wq_max_timer_interval_usecs
);
4111 sysctl_register_oid(&sysctl__kern_wq_max_threads
);
4112 sysctl_register_oid(&sysctl__kern_wq_max_constrained_threads
);
4113 sysctl_register_oid(&sysctl__kern_pthread_debug_tracing
);
4114 sysctl_register_oid(&sysctl__kern_pthread_mutex_default_policy
);
4117 sysctl_register_oid(&sysctl__debug_wq_kevent_test
);
4120 for (int i
= 0; i
< WORKQUEUE_NUM_BUCKETS
; i
++) {
4121 uint32_t thread_qos
= _wq_bucket_to_thread_qos(i
);
4122 wq_max_concurrency
[i
] = pthread_kern
->qos_max_parallelism(thread_qos
,
4123 QOS_PARALLELISM_COUNT_LOGICAL
);
4125 wq_max_concurrency
[WORKQUEUE_EVENT_MANAGER_BUCKET
] = 1;