2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
33 #pragma mark - Front Matter
35 #define _PTHREAD_CONDATTR_T
36 #define _PTHREAD_COND_T
37 #define _PTHREAD_MUTEXATTR_T
38 #define _PTHREAD_MUTEX_T
39 #define _PTHREAD_RWLOCKATTR_T
40 #define _PTHREAD_RWLOCK_T
42 #undef pthread_mutexattr_t
43 #undef pthread_mutex_t
44 #undef pthread_condattr_t
46 #undef pthread_rwlockattr_t
47 #undef pthread_rwlock_t
49 #include <sys/cdefs.h>
51 // <rdar://problem/26158937> panic() should be marked noreturn
52 extern void panic(const char *string
, ...) __printflike(1,2) __dead2
;
54 #include <sys/param.h>
55 #include <sys/queue.h>
56 #include <sys/resourcevar.h>
57 //#include <sys/proc_internal.h>
58 #include <sys/kauth.h>
59 #include <sys/systm.h>
60 #include <sys/timeb.h>
61 #include <sys/times.h>
63 #include <sys/kernel.h>
65 #include <sys/signalvar.h>
66 #include <sys/sysctl.h>
67 #include <sys/syslog.h>
70 #include <sys/kdebug.h>
71 //#include <sys/sysproto.h>
73 #include <sys/user.h> /* for coredump */
74 #include <sys/proc_info.h> /* for fill_procworkqueue */
76 #include <mach/mach_port.h>
77 #include <mach/mach_types.h>
78 #include <mach/semaphore.h>
79 #include <mach/sync_policy.h>
80 #include <mach/task.h>
81 #include <mach/vm_prot.h>
82 #include <kern/kern_types.h>
83 #include <kern/task.h>
84 #include <kern/clock.h>
85 #include <mach/kern_return.h>
86 #include <kern/thread.h>
87 #include <kern/zalloc.h>
88 #include <kern/sched_prim.h> /* for thread_exception_return */
89 #include <kern/processor.h>
90 #include <kern/assert.h>
91 #include <mach/mach_vm.h>
92 #include <mach/mach_param.h>
93 #include <mach/thread_status.h>
94 #include <mach/thread_policy.h>
95 #include <mach/message.h>
96 #include <mach/port.h>
97 //#include <vm/vm_protos.h>
98 #include <vm/vm_fault.h>
99 #include <vm/vm_map.h>
100 #include <mach/thread_act.h> /* for thread_resume */
101 #include <machine/machine_routines.h>
102 #include <mach/shared_region.h>
104 #include <libkern/OSAtomic.h>
105 #include <libkern/libkern.h>
107 #include <sys/pthread_shims.h>
108 #include "kern_internal.h"
110 // XXX: Dirty import for sys/signarvar.h that's wrapped in BSD_KERNEL_PRIVATE
111 #define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP))
113 // XXX: Ditto for thread tags from kern/thread.h
114 #define THREAD_TAG_MAINTHREAD 0x1
115 #define THREAD_TAG_PTHREAD 0x10
116 #define THREAD_TAG_WORKQUEUE 0x20
118 lck_grp_attr_t
*pthread_lck_grp_attr
;
119 lck_grp_t
*pthread_lck_grp
;
120 lck_attr_t
*pthread_lck_attr
;
122 zone_t pthread_zone_workqueue
;
123 zone_t pthread_zone_threadlist
;
124 zone_t pthread_zone_threadreq
;
126 extern void thread_set_cthreadself(thread_t thread
, uint64_t pself
, int isLP64
);
127 extern void workqueue_thread_yielded(void);
129 #define WQ_SETUP_FIRST_USE 1
130 #define WQ_SETUP_CLEAR_VOUCHER 2
131 static void _setup_wqthread(proc_t p
, thread_t th
, struct workqueue
*wq
,
132 struct threadlist
*tl
, int flags
);
134 static void reset_priority(struct threadlist
*tl
, pthread_priority_t pri
);
135 static pthread_priority_t
pthread_priority_from_wq_class_index(struct workqueue
*wq
, int index
);
137 static void wq_unpark_continue(void* ptr
, wait_result_t wait_result
) __dead2
;
139 static bool workqueue_addnewthread(proc_t p
, struct workqueue
*wq
);
140 static void workqueue_removethread(struct threadlist
*tl
, bool fromexit
, bool first_use
);
141 static void workqueue_lock_spin(struct workqueue
*);
142 static void workqueue_unlock(struct workqueue
*);
144 #define WQ_RUN_TR_THROTTLED 0
145 #define WQ_RUN_TR_THREAD_NEEDED 1
146 #define WQ_RUN_TR_THREAD_STARTED 2
147 #define WQ_RUN_TR_EXITING 3
148 static int workqueue_run_threadreq_and_unlock(proc_t p
, struct workqueue
*wq
,
149 struct threadlist
*tl
, struct threadreq
*req
, bool may_add_new_thread
);
151 static bool may_start_constrained_thread(struct workqueue
*wq
,
152 uint32_t at_priclass
, struct threadlist
*tl
, bool may_start_timer
);
154 static mach_vm_offset_t
stack_addr_hint(proc_t p
, vm_map_t vmap
);
155 static boolean_t
wq_thread_is_busy(uint64_t cur_ts
,
156 _Atomic
uint64_t *lastblocked_tsp
);
158 int proc_settargetconc(pid_t pid
, int queuenum
, int32_t targetconc
);
159 int proc_setalltargetconc(pid_t pid
, int32_t * targetconcp
);
161 #define WQ_MAXPRI_MIN 0 /* low prio queue num */
162 #define WQ_MAXPRI_MAX 2 /* max prio queuenum */
163 #define WQ_PRI_NUM 3 /* number of prio work queues */
165 #define C_32_STK_ALIGN 16
166 #define C_64_STK_ALIGN 16
167 #define C_64_REDZONE_LEN 128
169 #define PTHREAD_T_OFFSET 0
172 * Flags filed passed to bsdthread_create and back in pthread_start
173 31 <---------------------------------> 0
174 _________________________________________
175 | flags(8) | policy(8) | importance(16) |
176 -----------------------------------------
179 #define PTHREAD_START_CUSTOM 0x01000000
180 #define PTHREAD_START_SETSCHED 0x02000000
181 #define PTHREAD_START_DETACHED 0x04000000
182 #define PTHREAD_START_QOSCLASS 0x08000000
183 #define PTHREAD_START_TSD_BASE_SET 0x10000000
184 #define PTHREAD_START_QOSCLASS_MASK 0x00ffffff
185 #define PTHREAD_START_POLICY_BITSHIFT 16
186 #define PTHREAD_START_POLICY_MASK 0xff
187 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
189 #define SCHED_OTHER POLICY_TIMESHARE
190 #define SCHED_FIFO POLICY_FIFO
191 #define SCHED_RR POLICY_RR
193 #define BASEPRI_DEFAULT 31
197 static uint32_t wq_stalled_window_usecs
= WQ_STALLED_WINDOW_USECS
;
198 static uint32_t wq_reduce_pool_window_usecs
= WQ_REDUCE_POOL_WINDOW_USECS
;
199 static uint32_t wq_max_timer_interval_usecs
= WQ_MAX_TIMER_INTERVAL_USECS
;
200 static uint32_t wq_max_threads
= WORKQUEUE_MAXTHREADS
;
201 static uint32_t wq_max_constrained_threads
= WORKQUEUE_MAXTHREADS
/ 8;
202 static uint32_t wq_max_concurrency
[WORKQUEUE_NUM_BUCKETS
+ 1]; // set to ncpus on load
204 SYSCTL_INT(_kern
, OID_AUTO
, wq_stalled_window_usecs
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
205 &wq_stalled_window_usecs
, 0, "");
207 SYSCTL_INT(_kern
, OID_AUTO
, wq_reduce_pool_window_usecs
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
208 &wq_reduce_pool_window_usecs
, 0, "");
210 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_timer_interval_usecs
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
211 &wq_max_timer_interval_usecs
, 0, "");
213 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_threads
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
214 &wq_max_threads
, 0, "");
216 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_constrained_threads
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
217 &wq_max_constrained_threads
, 0, "");
220 static int wq_kevent_test SYSCTL_HANDLER_ARGS
;
221 SYSCTL_PROC(_debug
, OID_AUTO
, wq_kevent_test
, CTLFLAG_MASKED
| CTLFLAG_RW
| CTLFLAG_LOCKED
| CTLFLAG_ANYBODY
| CTLTYPE_OPAQUE
, NULL
, 0, wq_kevent_test
, 0, "-");
224 static uint32_t wq_init_constrained_limit
= 1;
226 uint32_t pthread_debug_tracing
= 1;
228 SYSCTL_INT(_kern
, OID_AUTO
, pthread_debug_tracing
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
229 &pthread_debug_tracing
, 0, "")
232 * +-----+-----+-----+-----+-----+-----+-----+
233 * | MT | BG | UT | DE | IN | UN | mgr |
234 * +-----+-----+-----+-----+-----+-----+-----+-----+
235 * | pri | 5 | 4 | 3 | 2 | 1 | 0 | 6 |
236 * | qos | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
237 * +-----+-----+-----+-----+-----+-----+-----+-----+
239 static inline uint32_t
240 _wq_bucket_to_thread_qos(int pri
)
242 if (pri
== WORKQUEUE_EVENT_MANAGER_BUCKET
) {
243 return WORKQUEUE_EVENT_MANAGER_BUCKET
+ 1;
245 return WORKQUEUE_EVENT_MANAGER_BUCKET
- pri
;
248 #pragma mark wq_thactive
250 #if defined(__LP64__)
252 // 7 * 16 bits for each QoS bucket request count (including manager)
253 // 3 bits of best QoS among all pending constrained requests
255 #define WQ_THACTIVE_BUCKET_WIDTH 16
256 #define WQ_THACTIVE_QOS_SHIFT (7 * WQ_THACTIVE_BUCKET_WIDTH)
259 // 6 * 10 bits for each QoS bucket request count (except manager)
260 // 1 bit for the manager bucket
261 // 3 bits of best QoS among all pending constrained requests
262 #define WQ_THACTIVE_BUCKET_WIDTH 10
263 #define WQ_THACTIVE_QOS_SHIFT (6 * WQ_THACTIVE_BUCKET_WIDTH + 1)
265 #define WQ_THACTIVE_BUCKET_MASK ((1U << WQ_THACTIVE_BUCKET_WIDTH) - 1)
266 #define WQ_THACTIVE_BUCKET_HALF (1U << (WQ_THACTIVE_BUCKET_WIDTH - 1))
267 #define WQ_THACTIVE_NO_PENDING_REQUEST 6
269 _Static_assert(sizeof(wq_thactive_t
) * CHAR_BIT
- WQ_THACTIVE_QOS_SHIFT
>= 3,
270 "Make sure we have space to encode a QoS");
272 static inline wq_thactive_t
273 _wq_thactive_fetch_and_add(struct workqueue
*wq
, wq_thactive_t offset
)
275 #if PTHREAD_INLINE_RMW_ATOMICS || !defined(__LP64__)
276 return atomic_fetch_add_explicit(&wq
->wq_thactive
, offset
,
277 memory_order_relaxed
);
279 return pthread_kern
->atomic_fetch_add_128_relaxed(&wq
->wq_thactive
, offset
);
283 static inline wq_thactive_t
284 _wq_thactive(struct workqueue
*wq
)
286 #if PTHREAD_INLINE_RMW_ATOMICS || !defined(__LP64__)
287 return atomic_load_explicit(&wq
->wq_thactive
, memory_order_relaxed
);
289 return pthread_kern
->atomic_load_128_relaxed(&wq
->wq_thactive
);
293 #define WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(tha) \
294 ((tha) >> WQ_THACTIVE_QOS_SHIFT)
296 static inline uint32_t
297 _wq_thactive_best_constrained_req_qos(struct workqueue
*wq
)
299 // Avoid expensive atomic operations: the three bits we're loading are in
300 // a single byte, and always updated under the workqueue lock
301 wq_thactive_t v
= *(wq_thactive_t
*)&wq
->wq_thactive
;
302 return WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(v
);
305 static inline wq_thactive_t
306 _wq_thactive_set_best_constrained_req_qos(struct workqueue
*wq
,
307 uint32_t orig_qos
, uint32_t new_qos
)
310 v
= (wq_thactive_t
)(new_qos
- orig_qos
) << WQ_THACTIVE_QOS_SHIFT
;
312 * We can do an atomic add relative to the initial load because updates
313 * to this qos are always serialized under the workqueue lock.
315 return _wq_thactive_fetch_and_add(wq
, v
) + v
;
318 static inline wq_thactive_t
319 _wq_thactive_offset_for_qos(int qos
)
321 return (wq_thactive_t
)1 << (qos
* WQ_THACTIVE_BUCKET_WIDTH
);
324 static inline wq_thactive_t
325 _wq_thactive_inc(struct workqueue
*wq
, int qos
)
327 return _wq_thactive_fetch_and_add(wq
, _wq_thactive_offset_for_qos(qos
));
330 static inline wq_thactive_t
331 _wq_thactive_dec(struct workqueue
*wq
, int qos
)
333 return _wq_thactive_fetch_and_add(wq
, -_wq_thactive_offset_for_qos(qos
));
336 static inline wq_thactive_t
337 _wq_thactive_move(struct workqueue
*wq
, int oldqos
, int newqos
)
339 return _wq_thactive_fetch_and_add(wq
, _wq_thactive_offset_for_qos(newqos
) -
340 _wq_thactive_offset_for_qos(oldqos
));
343 static inline uint32_t
344 _wq_thactive_aggregate_downto_qos(struct workqueue
*wq
, wq_thactive_t v
,
345 int qos
, uint32_t *busycount
, uint32_t *max_busycount
)
347 uint32_t count
= 0, active
;
352 * on 32bits the manager bucket is a single bit and the best constrained
353 * request QoS 3 bits are where the 10 bits of a regular QoS bucket count
354 * would be. Mask them out.
356 v
&= ~(~0ull << WQ_THACTIVE_QOS_SHIFT
);
359 curtime
= mach_absolute_time();
363 *max_busycount
= qos
+ 1;
365 for (int i
= 0; i
<= qos
; i
++, v
>>= WQ_THACTIVE_BUCKET_WIDTH
) {
366 active
= v
& WQ_THACTIVE_BUCKET_MASK
;
368 if (busycount
&& wq
->wq_thscheduled_count
[i
] > active
) {
369 if (wq_thread_is_busy(curtime
, &wq
->wq_lastblocked_ts
[i
])) {
371 * We only consider the last blocked thread for a given bucket
372 * as busy because we don't want to take the list lock in each
373 * sched callback. However this is an approximation that could
374 * contribute to thread creation storms.
383 #pragma mark - Process/Thread Setup/Teardown syscalls
385 static mach_vm_offset_t
386 stack_addr_hint(proc_t p
, vm_map_t vmap
)
388 mach_vm_offset_t stackaddr
;
389 mach_vm_offset_t aslr_offset
;
390 bool proc64bit
= proc_is64bit(p
);
392 // We can't safely take random values % something unless its a power-of-two
393 _Static_assert(powerof2(PTH_DEFAULT_STACKSIZE
), "PTH_DEFAULT_STACKSIZE is a power-of-two");
395 #if defined(__i386__) || defined(__x86_64__)
397 // Matches vm_map_get_max_aslr_slide_pages's image shift in xnu
398 aslr_offset
= random() % (1 << 28); // about 512 stacks
400 // Actually bigger than the image shift, we've got ~256MB to work with
401 aslr_offset
= random() % (16 * PTH_DEFAULT_STACKSIZE
);
403 aslr_offset
= vm_map_trunc_page_mask(aslr_offset
, vm_map_page_mask(vmap
));
405 // Above nanomalloc range (see NANOZONE_SIGNATURE)
406 stackaddr
= 0x700000000000 + aslr_offset
;
408 stackaddr
= SHARED_REGION_BASE_I386
+ SHARED_REGION_SIZE_I386
+ aslr_offset
;
410 #elif defined(__arm__) || defined(__arm64__)
411 user_addr_t main_thread_stack_top
= 0;
412 if (pthread_kern
->proc_get_user_stack
) {
413 main_thread_stack_top
= pthread_kern
->proc_get_user_stack(p
);
415 if (proc64bit
&& main_thread_stack_top
) {
416 // The main thread stack position is randomly slid by xnu (c.f.
417 // load_main() in mach_loader.c), so basing pthread stack allocations
418 // where the main thread stack ends is already ASLRd and doing so
419 // avoids creating a gap in the process address space that may cause
420 // extra PTE memory usage. rdar://problem/33328206
421 stackaddr
= vm_map_trunc_page_mask((vm_map_offset_t
)main_thread_stack_top
,
422 vm_map_page_mask(vmap
));
424 // vm_map_get_max_aslr_slide_pages ensures 1MB of slide, we do better
425 aslr_offset
= random() % ((proc64bit
? 4 : 2) * PTH_DEFAULT_STACKSIZE
);
426 aslr_offset
= vm_map_trunc_page_mask((vm_map_offset_t
)aslr_offset
,
427 vm_map_page_mask(vmap
));
429 // 64 stacks below shared region
430 stackaddr
= SHARED_REGION_BASE_ARM64
- 64 * PTH_DEFAULT_STACKSIZE
- aslr_offset
;
432 // If you try to slide down from this point, you risk ending up in memory consumed by malloc
433 stackaddr
= SHARED_REGION_BASE_ARM
- 32 * PTH_DEFAULT_STACKSIZE
+ aslr_offset
;
437 #error Need to define a stack address hint for this architecture
443 * bsdthread_create system call. Used by pthread_create.
446 _bsdthread_create(struct proc
*p
, user_addr_t user_func
, user_addr_t user_funcarg
, user_addr_t user_stack
, user_addr_t user_pthread
, uint32_t flags
, user_addr_t
*retval
)
452 mach_vm_offset_t stackaddr
;
453 mach_vm_size_t th_allocsize
= 0;
454 mach_vm_size_t th_guardsize
;
455 mach_vm_offset_t th_stack
;
456 mach_vm_offset_t th_pthread
;
457 mach_vm_offset_t th_tsd_base
;
458 mach_port_name_t th_thport
;
460 vm_map_t vmap
= pthread_kern
->current_map();
461 task_t ctask
= current_task();
462 unsigned int policy
, importance
;
467 if (pthread_kern
->proc_get_register(p
) == 0) {
471 PTHREAD_TRACE(TRACE_pthread_thread_create
| DBG_FUNC_START
, flags
, 0, 0, 0, 0);
473 isLP64
= proc_is64bit(p
);
474 th_guardsize
= vm_map_page_size(vmap
);
476 stackaddr
= pthread_kern
->proc_get_stack_addr_hint(p
);
477 kret
= pthread_kern
->thread_create(ctask
, &th
);
478 if (kret
!= KERN_SUCCESS
)
480 thread_reference(th
);
482 pthread_kern
->thread_set_tag(th
, THREAD_TAG_PTHREAD
);
484 sright
= (void *)pthread_kern
->convert_thread_to_port(th
);
485 th_thport
= pthread_kern
->ipc_port_copyout_send(sright
, pthread_kern
->task_get_ipcspace(ctask
));
486 if (!MACH_PORT_VALID(th_thport
)) {
487 error
= EMFILE
; // userland will convert this into a crash
491 if ((flags
& PTHREAD_START_CUSTOM
) == 0) {
492 mach_vm_size_t pthread_size
=
493 vm_map_round_page_mask(pthread_kern
->proc_get_pthsize(p
) + PTHREAD_T_OFFSET
, vm_map_page_mask(vmap
));
494 th_allocsize
= th_guardsize
+ user_stack
+ pthread_size
;
495 user_stack
+= PTHREAD_T_OFFSET
;
497 kret
= mach_vm_map(vmap
, &stackaddr
,
500 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
, NULL
,
501 0, FALSE
, VM_PROT_DEFAULT
, VM_PROT_ALL
,
503 if (kret
!= KERN_SUCCESS
){
504 kret
= mach_vm_allocate(vmap
,
505 &stackaddr
, th_allocsize
,
506 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
);
508 if (kret
!= KERN_SUCCESS
) {
513 PTHREAD_TRACE(TRACE_pthread_thread_create
|DBG_FUNC_NONE
, th_allocsize
, stackaddr
, 0, 2, 0);
517 * The guard page is at the lowest address
518 * The stack base is the highest address
520 kret
= mach_vm_protect(vmap
, stackaddr
, th_guardsize
, FALSE
, VM_PROT_NONE
);
522 if (kret
!= KERN_SUCCESS
) {
527 th_pthread
= stackaddr
+ th_guardsize
+ user_stack
;
528 th_stack
= th_pthread
;
531 * Pre-fault the first page of the new thread's stack and the page that will
532 * contain the pthread_t structure.
534 if (vm_map_trunc_page_mask((vm_map_offset_t
)(th_stack
- C_64_REDZONE_LEN
), vm_map_page_mask(vmap
)) !=
535 vm_map_trunc_page_mask((vm_map_offset_t
)th_pthread
, vm_map_page_mask(vmap
))){
537 vm_map_trunc_page_mask((vm_map_offset_t
)(th_stack
- C_64_REDZONE_LEN
), vm_map_page_mask(vmap
)),
538 VM_PROT_READ
| VM_PROT_WRITE
,
540 THREAD_UNINT
, NULL
, 0);
544 vm_map_trunc_page_mask((vm_map_offset_t
)th_pthread
, vm_map_page_mask(vmap
)),
545 VM_PROT_READ
| VM_PROT_WRITE
,
547 THREAD_UNINT
, NULL
, 0);
550 th_stack
= user_stack
;
551 th_pthread
= user_pthread
;
553 PTHREAD_TRACE(TRACE_pthread_thread_create
|DBG_FUNC_NONE
, 0, 0, 0, 3, 0);
556 tsd_offset
= pthread_kern
->proc_get_pthread_tsd_offset(p
);
558 th_tsd_base
= th_pthread
+ tsd_offset
;
559 kret
= pthread_kern
->thread_set_tsd_base(th
, th_tsd_base
);
560 if (kret
== KERN_SUCCESS
) {
561 flags
|= PTHREAD_START_TSD_BASE_SET
;
565 #if defined(__i386__) || defined(__x86_64__)
567 * Set up i386 registers & function call.
570 x86_thread_state32_t state
= {
571 .eip
= (unsigned int)pthread_kern
->proc_get_threadstart(p
),
572 .eax
= (unsigned int)th_pthread
,
573 .ebx
= (unsigned int)th_thport
,
574 .ecx
= (unsigned int)user_func
,
575 .edx
= (unsigned int)user_funcarg
,
576 .edi
= (unsigned int)user_stack
,
577 .esi
= (unsigned int)flags
,
581 .esp
= (int)((vm_offset_t
)(th_stack
-C_32_STK_ALIGN
))
584 error
= pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
585 if (error
!= KERN_SUCCESS
) {
590 x86_thread_state64_t state64
= {
591 .rip
= (uint64_t)pthread_kern
->proc_get_threadstart(p
),
592 .rdi
= (uint64_t)th_pthread
,
593 .rsi
= (uint64_t)(th_thport
),
594 .rdx
= (uint64_t)user_func
,
595 .rcx
= (uint64_t)user_funcarg
,
596 .r8
= (uint64_t)user_stack
,
597 .r9
= (uint64_t)flags
,
599 * set stack pointer aligned to 16 byte boundary
601 .rsp
= (uint64_t)(th_stack
- C_64_REDZONE_LEN
)
604 error
= pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state64
);
605 if (error
!= KERN_SUCCESS
) {
611 #elif defined(__arm__)
612 arm_thread_state_t state
= {
613 .pc
= (int)pthread_kern
->proc_get_threadstart(p
),
614 .r
[0] = (unsigned int)th_pthread
,
615 .r
[1] = (unsigned int)th_thport
,
616 .r
[2] = (unsigned int)user_func
,
617 .r
[3] = (unsigned int)user_funcarg
,
618 .r
[4] = (unsigned int)user_stack
,
619 .r
[5] = (unsigned int)flags
,
621 /* Set r7 & lr to 0 for better back tracing */
628 .sp
= (int)((vm_offset_t
)(th_stack
-C_32_STK_ALIGN
))
631 (void) pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
634 #error bsdthread_create not defined for this architecture
637 if ((flags
& PTHREAD_START_SETSCHED
) != 0) {
638 /* Set scheduling parameters if needed */
639 thread_extended_policy_data_t extinfo
;
640 thread_precedence_policy_data_t precedinfo
;
642 importance
= (flags
& PTHREAD_START_IMPORTANCE_MASK
);
643 policy
= (flags
>> PTHREAD_START_POLICY_BITSHIFT
) & PTHREAD_START_POLICY_MASK
;
645 if (policy
== SCHED_OTHER
) {
646 extinfo
.timeshare
= 1;
648 extinfo
.timeshare
= 0;
651 thread_policy_set(th
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extinfo
, THREAD_EXTENDED_POLICY_COUNT
);
653 precedinfo
.importance
= (importance
- BASEPRI_DEFAULT
);
654 thread_policy_set(th
, THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&precedinfo
, THREAD_PRECEDENCE_POLICY_COUNT
);
655 } else if ((flags
& PTHREAD_START_QOSCLASS
) != 0) {
656 /* Set thread QoS class if requested. */
657 pthread_priority_t priority
= (pthread_priority_t
)(flags
& PTHREAD_START_QOSCLASS_MASK
);
659 thread_qos_policy_data_t qos
;
660 qos
.qos_tier
= pthread_priority_get_thread_qos(priority
);
661 qos
.tier_importance
= (qos
.qos_tier
== QOS_CLASS_UNSPECIFIED
) ? 0 :
662 _pthread_priority_get_relpri(priority
);
664 pthread_kern
->thread_policy_set_internal(th
, THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, THREAD_QOS_POLICY_COUNT
);
667 if (pthread_kern
->proc_get_mach_thread_self_tsd_offset
) {
668 uint64_t mach_thread_self_offset
=
669 pthread_kern
->proc_get_mach_thread_self_tsd_offset(p
);
670 if (mach_thread_self_offset
&& tsd_offset
) {
671 bool proc64bit
= proc_is64bit(p
);
673 uint64_t th_thport_tsd
= (uint64_t)th_thport
;
674 error
= copyout(&th_thport_tsd
, th_pthread
+ tsd_offset
+
675 mach_thread_self_offset
, sizeof(th_thport_tsd
));
677 uint32_t th_thport_tsd
= (uint32_t)th_thport
;
678 error
= copyout(&th_thport_tsd
, th_pthread
+ tsd_offset
+
679 mach_thread_self_offset
, sizeof(th_thport_tsd
));
687 kret
= pthread_kern
->thread_resume(th
);
688 if (kret
!= KERN_SUCCESS
) {
692 thread_deallocate(th
); /* drop the creator reference */
694 PTHREAD_TRACE(TRACE_pthread_thread_create
|DBG_FUNC_END
, error
, th_pthread
, 0, 0, 0);
696 // cast required as mach_vm_offset_t is always 64 bits even on 32-bit platforms
697 *retval
= (user_addr_t
)th_pthread
;
702 if (allocated
!= 0) {
703 (void)mach_vm_deallocate(vmap
, stackaddr
, th_allocsize
);
706 (void)pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(ctask
), th_thport
);
707 if (pthread_kern
->thread_will_park_or_terminate
) {
708 pthread_kern
->thread_will_park_or_terminate(th
);
710 (void)thread_terminate(th
);
711 (void)thread_deallocate(th
);
716 * bsdthread_terminate system call. Used by pthread_terminate
719 _bsdthread_terminate(__unused
struct proc
*p
,
720 user_addr_t stackaddr
,
724 __unused
int32_t *retval
)
726 mach_vm_offset_t freeaddr
;
727 mach_vm_size_t freesize
;
729 thread_t th
= current_thread();
731 freeaddr
= (mach_vm_offset_t
)stackaddr
;
734 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_START
, freeaddr
, freesize
, kthport
, 0xff, 0);
736 if ((freesize
!= (mach_vm_size_t
)0) && (freeaddr
!= (mach_vm_offset_t
)0)) {
737 if (pthread_kern
->thread_get_tag(th
) & THREAD_TAG_MAINTHREAD
){
738 vm_map_t user_map
= pthread_kern
->current_map();
739 freesize
= vm_map_trunc_page_mask((vm_map_offset_t
)freesize
- 1, vm_map_page_mask(user_map
));
740 kret
= mach_vm_behavior_set(user_map
, freeaddr
, freesize
, VM_BEHAVIOR_REUSABLE
);
741 assert(kret
== KERN_SUCCESS
|| kret
== KERN_INVALID_ADDRESS
);
742 kret
= kret
? kret
: mach_vm_protect(user_map
, freeaddr
, freesize
, FALSE
, VM_PROT_NONE
);
743 assert(kret
== KERN_SUCCESS
|| kret
== KERN_INVALID_ADDRESS
);
745 kret
= mach_vm_deallocate(pthread_kern
->current_map(), freeaddr
, freesize
);
746 if (kret
!= KERN_SUCCESS
) {
747 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, kret
, 0, 0, 0, 0);
753 if (pthread_kern
->thread_will_park_or_terminate
) {
754 pthread_kern
->thread_will_park_or_terminate(th
);
756 (void)thread_terminate(th
);
757 if (sem
!= MACH_PORT_NULL
) {
758 kret
= pthread_kern
->semaphore_signal_internal_trap(sem
);
759 if (kret
!= KERN_SUCCESS
) {
760 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, kret
, 0, 0, 0, 0);
765 if (kthport
!= MACH_PORT_NULL
) {
766 pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(current_task()), kthport
);
769 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, 0, 0, 0, 0, 0);
771 pthread_kern
->thread_exception_return();
772 panic("bsdthread_terminate: still running\n");
774 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, 0, 0xff, 0, 0, 0);
780 * bsdthread_register system call. Performs per-process setup. Responsible for
781 * returning capabilitiy bits to userspace and receiving userspace function addresses.
784 _bsdthread_register(struct proc
*p
,
785 user_addr_t threadstart
,
786 user_addr_t wqthread
,
788 user_addr_t pthread_init_data
,
789 user_addr_t pthread_init_data_size
,
790 uint64_t dispatchqueue_offset
,
793 struct _pthread_registration_data data
= {};
794 uint32_t max_tsd_offset
;
796 size_t pthread_init_sz
= 0;
798 /* syscall randomizer test can pass bogus values */
799 if (pthsize
< 0 || pthsize
> MAX_PTHREAD_SIZE
) {
803 * if we have pthread_init_data, then we use that and target_concptr
804 * (which is an offset) get data.
806 if (pthread_init_data
!= 0) {
807 if (pthread_init_data_size
< sizeof(data
.version
)) {
810 pthread_init_sz
= MIN(sizeof(data
), (size_t)pthread_init_data_size
);
811 int ret
= copyin(pthread_init_data
, &data
, pthread_init_sz
);
815 if (data
.version
!= (size_t)pthread_init_data_size
) {
819 data
.dispatch_queue_offset
= dispatchqueue_offset
;
822 /* We have to do this before proc_get_register so that it resets after fork */
823 mach_vm_offset_t stackaddr
= stack_addr_hint(p
, pthread_kern
->current_map());
824 pthread_kern
->proc_set_stack_addr_hint(p
, (user_addr_t
)stackaddr
);
826 /* prevent multiple registrations */
827 if (pthread_kern
->proc_get_register(p
) != 0) {
831 pthread_kern
->proc_set_threadstart(p
, threadstart
);
832 pthread_kern
->proc_set_wqthread(p
, wqthread
);
833 pthread_kern
->proc_set_pthsize(p
, pthsize
);
834 pthread_kern
->proc_set_register(p
);
836 uint32_t tsd_slot_sz
= proc_is64bit(p
) ? sizeof(uint64_t) : sizeof(uint32_t);
837 if ((uint32_t)pthsize
>= tsd_slot_sz
&&
838 data
.tsd_offset
<= (uint32_t)(pthsize
- tsd_slot_sz
)) {
839 max_tsd_offset
= ((uint32_t)pthsize
- data
.tsd_offset
- tsd_slot_sz
);
844 pthread_kern
->proc_set_pthread_tsd_offset(p
, data
.tsd_offset
);
846 if (data
.dispatch_queue_offset
> max_tsd_offset
) {
847 data
.dispatch_queue_offset
= 0;
849 pthread_kern
->proc_set_dispatchqueue_offset(p
, data
.dispatch_queue_offset
);
851 if (pthread_kern
->proc_set_return_to_kernel_offset
) {
852 if (data
.return_to_kernel_offset
> max_tsd_offset
) {
853 data
.return_to_kernel_offset
= 0;
855 pthread_kern
->proc_set_return_to_kernel_offset(p
,
856 data
.return_to_kernel_offset
);
859 if (pthread_kern
->proc_set_mach_thread_self_tsd_offset
) {
860 if (data
.mach_thread_self_offset
> max_tsd_offset
) {
861 data
.mach_thread_self_offset
= 0;
863 pthread_kern
->proc_set_mach_thread_self_tsd_offset(p
,
864 data
.mach_thread_self_offset
);
867 if (pthread_init_data
!= 0) {
868 /* Outgoing data that userspace expects as a reply */
869 data
.version
= sizeof(struct _pthread_registration_data
);
870 if (pthread_kern
->qos_main_thread_active()) {
871 mach_msg_type_number_t nqos
= THREAD_QOS_POLICY_COUNT
;
872 thread_qos_policy_data_t qos
;
873 boolean_t gd
= FALSE
;
875 kr
= pthread_kern
->thread_policy_get(current_thread(), THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, &nqos
, &gd
);
876 if (kr
!= KERN_SUCCESS
|| qos
.qos_tier
== THREAD_QOS_UNSPECIFIED
) {
877 /* Unspecified threads means the kernel wants us to impose legacy upon the thread. */
878 qos
.qos_tier
= THREAD_QOS_LEGACY
;
879 qos
.tier_importance
= 0;
881 kr
= pthread_kern
->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, THREAD_QOS_POLICY_COUNT
);
884 if (kr
== KERN_SUCCESS
) {
885 data
.main_qos
= thread_qos_get_pthread_priority(qos
.qos_tier
);
887 data
.main_qos
= _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED
, 0, 0);
890 data
.main_qos
= _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED
, 0, 0);
893 kr
= copyout(&data
, pthread_init_data
, pthread_init_sz
);
894 if (kr
!= KERN_SUCCESS
) {
899 /* return the supported feature set as the return value. */
900 *retval
= PTHREAD_FEATURE_SUPPORTED
;
905 #pragma mark - QoS Manipulation
908 _bsdthread_ctl_set_qos(struct proc
*p
, user_addr_t __unused cmd
, mach_port_name_t kport
, user_addr_t tsd_priority_addr
, user_addr_t arg3
, int *retval
)
913 pthread_priority_t priority
;
915 /* Unused parameters must be zero. */
920 /* QoS is stored in a given slot in the pthread TSD. We need to copy that in and set our QoS based on it. */
921 if (proc_is64bit(p
)) {
923 rv
= copyin(tsd_priority_addr
, &v
, sizeof(v
));
925 priority
= (int)(v
& 0xffffffff);
928 rv
= copyin(tsd_priority_addr
, &v
, sizeof(v
));
933 if ((th
= port_name_to_thread(kport
)) == THREAD_NULL
) {
937 /* <rdar://problem/16211829> Disable pthread_set_qos_class_np() on threads other than pthread_self */
938 if (th
!= current_thread()) {
939 thread_deallocate(th
);
943 rv
= _bsdthread_ctl_set_self(p
, 0, priority
, 0, _PTHREAD_SET_SELF_QOS_FLAG
, retval
);
945 /* Static param the thread, we just set QoS on it, so its stuck in QoS land now. */
946 /* pthread_kern->thread_static_param(th, TRUE); */ // see <rdar://problem/16433744>, for details
948 thread_deallocate(th
);
954 static inline struct threadlist
*
955 util_get_thread_threadlist_entry(thread_t th
)
957 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
959 struct threadlist
*tl
= pthread_kern
->uthread_get_threadlist(uth
);
966 _workq_thread_has_been_unbound(thread_t th
, int qos_class
)
968 struct threadlist
*tl
= util_get_thread_threadlist_entry(th
);
973 struct workqueue
*wq
= tl
->th_workq
;
974 workqueue_lock_spin(wq
);
976 if (tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
) {
978 } else if (qos_class
!= class_index_get_thread_qos(tl
->th_priority
)) {
982 if ((tl
->th_flags
& TH_LIST_KEVENT_BOUND
)){
985 tl
->th_flags
&= ~TH_LIST_KEVENT_BOUND
;
987 workqueue_unlock(wq
);
991 workqueue_unlock(wq
);
996 _bsdthread_ctl_set_self(struct proc
*p
, user_addr_t __unused cmd
, pthread_priority_t priority
, mach_port_name_t voucher
, _pthread_set_flags_t flags
, int __unused
*retval
)
998 thread_qos_policy_data_t qos
;
999 mach_msg_type_number_t nqos
= THREAD_QOS_POLICY_COUNT
;
1000 boolean_t gd
= FALSE
;
1001 thread_t th
= current_thread();
1002 struct workqueue
*wq
= NULL
;
1003 struct threadlist
*tl
= NULL
;
1006 int qos_rv
= 0, voucher_rv
= 0, fixedpri_rv
= 0;
1008 if ((flags
& _PTHREAD_SET_SELF_WQ_KEVENT_UNBIND
) != 0) {
1009 tl
= util_get_thread_threadlist_entry(th
);
1016 workqueue_lock_spin(wq
);
1017 if (tl
->th_flags
& TH_LIST_KEVENT_BOUND
) {
1018 tl
->th_flags
&= ~TH_LIST_KEVENT_BOUND
;
1019 unsigned int kevent_flags
= KEVENT_FLAG_WORKQ
| KEVENT_FLAG_UNBIND_CHECK_FLAGS
;
1020 if (tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
) {
1021 kevent_flags
|= KEVENT_FLAG_WORKQ_MANAGER
;
1024 workqueue_unlock(wq
);
1025 __assert_only
int ret
= kevent_qos_internal_unbind(p
, class_index_get_thread_qos(tl
->th_priority
), th
, kevent_flags
);
1028 workqueue_unlock(wq
);
1033 if ((flags
& _PTHREAD_SET_SELF_QOS_FLAG
) != 0) {
1034 kr
= pthread_kern
->thread_policy_get(th
, THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, &nqos
, &gd
);
1035 if (kr
!= KERN_SUCCESS
) {
1041 * If we have main-thread QoS then we don't allow a thread to come out
1042 * of QOS_CLASS_UNSPECIFIED.
1044 if (pthread_kern
->qos_main_thread_active() && qos
.qos_tier
==
1045 THREAD_QOS_UNSPECIFIED
) {
1051 tl
= util_get_thread_threadlist_entry(th
);
1052 if (tl
) wq
= tl
->th_workq
;
1055 PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self
| DBG_FUNC_START
, wq
, qos
.qos_tier
, qos
.tier_importance
, 0, 0);
1057 qos
.qos_tier
= pthread_priority_get_thread_qos(priority
);
1058 qos
.tier_importance
= (qos
.qos_tier
== QOS_CLASS_UNSPECIFIED
) ? 0 : _pthread_priority_get_relpri(priority
);
1060 if (qos
.qos_tier
== QOS_CLASS_UNSPECIFIED
||
1061 qos
.tier_importance
> 0 || qos
.tier_importance
< THREAD_QOS_MIN_TIER_IMPORTANCE
) {
1067 * If we're a workqueue, the threadlist item priority needs adjusting,
1068 * along with the bucket we were running in.
1071 bool try_run_threadreq
= false;
1073 workqueue_lock_spin(wq
);
1074 kr
= pthread_kern
->thread_set_workq_qos(th
, qos
.qos_tier
, qos
.tier_importance
);
1075 assert(kr
== KERN_SUCCESS
|| kr
== KERN_TERMINATED
);
1077 /* Fix up counters. */
1078 uint8_t old_bucket
= tl
->th_priority
;
1079 uint8_t new_bucket
= pthread_priority_get_class_index(priority
);
1081 if (old_bucket
!= new_bucket
) {
1082 _wq_thactive_move(wq
, old_bucket
, new_bucket
);
1083 wq
->wq_thscheduled_count
[old_bucket
]--;
1084 wq
->wq_thscheduled_count
[new_bucket
]++;
1085 if (old_bucket
== WORKQUEUE_EVENT_MANAGER_BUCKET
||
1086 old_bucket
< new_bucket
) {
1088 * if the QoS of the thread was lowered, then this could
1089 * allow for a higher QoS thread request to run, so we need
1092 try_run_threadreq
= true;
1094 tl
->th_priority
= new_bucket
;
1097 bool old_overcommit
= !(tl
->th_flags
& TH_LIST_CONSTRAINED
);
1098 bool new_overcommit
= priority
& _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
;
1099 if (!old_overcommit
&& new_overcommit
) {
1100 if (wq
->wq_constrained_threads_scheduled
-- ==
1101 wq_max_constrained_threads
) {
1102 try_run_threadreq
= true;
1104 tl
->th_flags
&= ~TH_LIST_CONSTRAINED
;
1105 } else if (old_overcommit
&& !new_overcommit
) {
1106 wq
->wq_constrained_threads_scheduled
++;
1107 tl
->th_flags
|= TH_LIST_CONSTRAINED
;
1110 if (try_run_threadreq
) {
1111 workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, NULL
, true);
1113 workqueue_unlock(wq
);
1116 kr
= pthread_kern
->thread_policy_set_internal(th
, THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, THREAD_QOS_POLICY_COUNT
);
1117 if (kr
!= KERN_SUCCESS
) {
1122 PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self
| DBG_FUNC_END
, wq
, qos
.qos_tier
, qos
.tier_importance
, 0, 0);
1126 if ((flags
& _PTHREAD_SET_SELF_VOUCHER_FLAG
) != 0) {
1127 kr
= pthread_kern
->thread_set_voucher_name(voucher
);
1128 if (kr
!= KERN_SUCCESS
) {
1129 voucher_rv
= ENOENT
;
1135 if (qos_rv
) goto done
;
1136 if ((flags
& _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG
) != 0) {
1137 thread_extended_policy_data_t extpol
= {.timeshare
= 0};
1139 if (!tl
) tl
= util_get_thread_threadlist_entry(th
);
1141 /* Not allowed on workqueue threads */
1142 fixedpri_rv
= ENOTSUP
;
1146 kr
= pthread_kern
->thread_policy_set_internal(th
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extpol
, THREAD_EXTENDED_POLICY_COUNT
);
1147 if (kr
!= KERN_SUCCESS
) {
1148 fixedpri_rv
= EINVAL
;
1151 } else if ((flags
& _PTHREAD_SET_SELF_TIMESHARE_FLAG
) != 0) {
1152 thread_extended_policy_data_t extpol
= {.timeshare
= 1};
1154 if (!tl
) tl
= util_get_thread_threadlist_entry(th
);
1156 /* Not allowed on workqueue threads */
1157 fixedpri_rv
= ENOTSUP
;
1161 kr
= pthread_kern
->thread_policy_set_internal(th
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extpol
, THREAD_EXTENDED_POLICY_COUNT
);
1162 if (kr
!= KERN_SUCCESS
) {
1163 fixedpri_rv
= EINVAL
;
1169 if (qos_rv
&& voucher_rv
) {
1170 /* Both failed, give that a unique error. */
1190 _bsdthread_ctl_qos_override_start(struct proc __unused
*p
, user_addr_t __unused cmd
, mach_port_name_t kport
, pthread_priority_t priority
, user_addr_t resource
, int __unused
*retval
)
1195 if ((th
= port_name_to_thread(kport
)) == THREAD_NULL
) {
1199 int override_qos
= pthread_priority_get_thread_qos(priority
);
1201 struct threadlist
*tl
= util_get_thread_threadlist_entry(th
);
1203 PTHREAD_TRACE_WQ(TRACE_wq_override_start
| DBG_FUNC_NONE
, tl
->th_workq
, thread_tid(th
), 1, priority
, 0);
1206 /* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */
1207 pthread_kern
->proc_usynch_thread_qos_add_override_for_resource_check_owner(th
, override_qos
, TRUE
,
1208 resource
, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE
, USER_ADDR_NULL
, MACH_PORT_NULL
);
1209 thread_deallocate(th
);
1214 _bsdthread_ctl_qos_override_end(struct proc __unused
*p
, user_addr_t __unused cmd
, mach_port_name_t kport
, user_addr_t resource
, user_addr_t arg3
, int __unused
*retval
)
1223 if ((th
= port_name_to_thread(kport
)) == THREAD_NULL
) {
1227 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
1229 struct threadlist
*tl
= util_get_thread_threadlist_entry(th
);
1231 PTHREAD_TRACE_WQ(TRACE_wq_override_end
| DBG_FUNC_NONE
, tl
->th_workq
, thread_tid(th
), 0, 0, 0);
1234 pthread_kern
->proc_usynch_thread_qos_remove_override_for_resource(current_task(), uth
, 0, resource
, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE
);
1236 thread_deallocate(th
);
1241 _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(mach_port_name_t kport
, pthread_priority_t priority
, user_addr_t resource
, user_addr_t ulock_addr
)
1246 if ((th
= port_name_to_thread(kport
)) == THREAD_NULL
) {
1250 int override_qos
= pthread_priority_get_thread_qos(priority
);
1252 struct threadlist
*tl
= util_get_thread_threadlist_entry(th
);
1254 thread_deallocate(th
);
1258 PTHREAD_TRACE_WQ(TRACE_wq_override_dispatch
| DBG_FUNC_NONE
, tl
->th_workq
, thread_tid(th
), 1, priority
, 0);
1260 rv
= pthread_kern
->proc_usynch_thread_qos_add_override_for_resource_check_owner(th
, override_qos
, TRUE
,
1261 resource
, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE
, ulock_addr
, kport
);
1263 thread_deallocate(th
);
1267 int _bsdthread_ctl_qos_dispatch_asynchronous_override_add(struct proc __unused
*p
, user_addr_t __unused cmd
,
1268 mach_port_name_t kport
, pthread_priority_t priority
, user_addr_t resource
, int __unused
*retval
)
1270 return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport
, priority
, resource
, USER_ADDR_NULL
);
1274 _bsdthread_ctl_qos_override_dispatch(struct proc
*p __unused
, user_addr_t cmd __unused
, mach_port_name_t kport
, pthread_priority_t priority
, user_addr_t ulock_addr
, int __unused
*retval
)
1276 return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport
, priority
, USER_ADDR_NULL
, ulock_addr
);
1280 _bsdthread_ctl_qos_override_reset(struct proc
*p
, user_addr_t cmd
, user_addr_t arg1
, user_addr_t arg2
, user_addr_t arg3
, int *retval
)
1282 if (arg1
!= 0 || arg2
!= 0 || arg3
!= 0) {
1286 return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p
, cmd
, 1 /* reset_all */, 0, 0, retval
);
1290 _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(struct proc __unused
*p
, user_addr_t __unused cmd
, int reset_all
, user_addr_t resource
, user_addr_t arg3
, int __unused
*retval
)
1292 if ((reset_all
&& (resource
!= 0)) || arg3
!= 0) {
1296 thread_t th
= current_thread();
1297 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
1298 struct threadlist
*tl
= pthread_kern
->uthread_get_threadlist(uth
);
1304 PTHREAD_TRACE_WQ(TRACE_wq_override_reset
| DBG_FUNC_NONE
, tl
->th_workq
, 0, 0, 0, 0);
1306 resource
= reset_all
? THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD
: resource
;
1307 pthread_kern
->proc_usynch_thread_qos_reset_override_for_resource(current_task(), uth
, 0, resource
, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE
);
1313 _bsdthread_ctl_max_parallelism(struct proc __unused
*p
, user_addr_t __unused cmd
,
1314 int qos
, unsigned long flags
, int *retval
)
1316 _Static_assert(QOS_PARALLELISM_COUNT_LOGICAL
==
1317 _PTHREAD_QOS_PARALLELISM_COUNT_LOGICAL
, "logical");
1318 _Static_assert(QOS_PARALLELISM_REALTIME
==
1319 _PTHREAD_QOS_PARALLELISM_REALTIME
, "realtime");
1321 if (flags
& ~(QOS_PARALLELISM_REALTIME
| QOS_PARALLELISM_COUNT_LOGICAL
)) {
1325 if (flags
& QOS_PARALLELISM_REALTIME
) {
1329 } else if (qos
== THREAD_QOS_UNSPECIFIED
|| qos
>= THREAD_QOS_LAST
) {
1333 *retval
= pthread_kern
->qos_max_parallelism(qos
, flags
);
1338 _bsdthread_ctl(struct proc
*p
, user_addr_t cmd
, user_addr_t arg1
, user_addr_t arg2
, user_addr_t arg3
, int *retval
)
1341 case BSDTHREAD_CTL_SET_QOS
:
1342 return _bsdthread_ctl_set_qos(p
, cmd
, (mach_port_name_t
)arg1
, arg2
, arg3
, retval
);
1343 case BSDTHREAD_CTL_QOS_OVERRIDE_START
:
1344 return _bsdthread_ctl_qos_override_start(p
, cmd
, (mach_port_name_t
)arg1
, (pthread_priority_t
)arg2
, arg3
, retval
);
1345 case BSDTHREAD_CTL_QOS_OVERRIDE_END
:
1346 return _bsdthread_ctl_qos_override_end(p
, cmd
, (mach_port_name_t
)arg1
, arg2
, arg3
, retval
);
1347 case BSDTHREAD_CTL_QOS_OVERRIDE_RESET
:
1348 return _bsdthread_ctl_qos_override_reset(p
, cmd
, arg1
, arg2
, arg3
, retval
);
1349 case BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH
:
1350 return _bsdthread_ctl_qos_override_dispatch(p
, cmd
, (mach_port_name_t
)arg1
, (pthread_priority_t
)arg2
, arg3
, retval
);
1351 case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD
:
1352 return _bsdthread_ctl_qos_dispatch_asynchronous_override_add(p
, cmd
, (mach_port_name_t
)arg1
, (pthread_priority_t
)arg2
, arg3
, retval
);
1353 case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET
:
1354 return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p
, cmd
, (int)arg1
, arg2
, arg3
, retval
);
1355 case BSDTHREAD_CTL_SET_SELF
:
1356 return _bsdthread_ctl_set_self(p
, cmd
, (pthread_priority_t
)arg1
, (mach_port_name_t
)arg2
, (_pthread_set_flags_t
)arg3
, retval
);
1357 case BSDTHREAD_CTL_QOS_MAX_PARALLELISM
:
1358 return _bsdthread_ctl_max_parallelism(p
, cmd
, (int)arg1
, (unsigned long)arg2
, retval
);
1364 #pragma mark - Workqueue Implementation
1366 #pragma mark wq_flags
1368 static inline uint32_t
1369 _wq_flags(struct workqueue
*wq
)
1371 return atomic_load_explicit(&wq
->wq_flags
, memory_order_relaxed
);
1375 _wq_exiting(struct workqueue
*wq
)
1377 return _wq_flags(wq
) & WQ_EXITING
;
1380 static inline uint32_t
1381 _wq_flags_or_orig(struct workqueue
*wq
, uint32_t v
)
1383 #if PTHREAD_INLINE_RMW_ATOMICS
1386 state
= _wq_flags(wq
);
1387 } while (!OSCompareAndSwap(state
, state
| v
, &wq
->wq_flags
));
1390 return atomic_fetch_or_explicit(&wq
->wq_flags
, v
, memory_order_relaxed
);
1394 static inline uint32_t
1395 _wq_flags_and_orig(struct workqueue
*wq
, uint32_t v
)
1397 #if PTHREAD_INLINE_RMW_ATOMICS
1400 state
= _wq_flags(wq
);
1401 } while (!OSCompareAndSwap(state
, state
& v
, &wq
->wq_flags
));
1404 return atomic_fetch_and_explicit(&wq
->wq_flags
, v
, memory_order_relaxed
);
1409 WQ_TIMER_DELAYED_NEEDED(struct workqueue
*wq
)
1411 uint32_t oldflags
, newflags
;
1413 oldflags
= _wq_flags(wq
);
1414 if (oldflags
& (WQ_EXITING
| WQ_ATIMER_DELAYED_RUNNING
)) {
1417 newflags
= oldflags
| WQ_ATIMER_DELAYED_RUNNING
;
1418 } while (!OSCompareAndSwap(oldflags
, newflags
, &wq
->wq_flags
));
1423 WQ_TIMER_IMMEDIATE_NEEDED(struct workqueue
*wq
)
1425 uint32_t oldflags
, newflags
;
1427 oldflags
= _wq_flags(wq
);
1428 if (oldflags
& (WQ_EXITING
| WQ_ATIMER_IMMEDIATE_RUNNING
)) {
1431 newflags
= oldflags
| WQ_ATIMER_IMMEDIATE_RUNNING
;
1432 } while (!OSCompareAndSwap(oldflags
, newflags
, &wq
->wq_flags
));
1436 #pragma mark thread requests pacing
1438 static inline uint32_t
1439 _wq_pacing_shift_for_pri(int pri
)
1441 return _wq_bucket_to_thread_qos(pri
) - 1;
1445 _wq_highest_paced_priority(struct workqueue
*wq
)
1447 uint8_t paced
= wq
->wq_paced
;
1448 int msb
= paced
? 32 - __builtin_clz(paced
) : 0; // fls(paced) == bit + 1
1449 return WORKQUEUE_EVENT_MANAGER_BUCKET
- msb
;
1452 static inline uint8_t
1453 _wq_pacing_bit_for_pri(int pri
)
1455 return 1u << _wq_pacing_shift_for_pri(pri
);
1459 _wq_should_pace_priority(struct workqueue
*wq
, int pri
)
1461 return wq
->wq_paced
>= _wq_pacing_bit_for_pri(pri
);
1465 _wq_pacing_start(struct workqueue
*wq
, struct threadlist
*tl
)
1467 uint8_t bit
= _wq_pacing_bit_for_pri(tl
->th_priority
);
1468 assert((tl
->th_flags
& TH_LIST_PACING
) == 0);
1469 assert((wq
->wq_paced
& bit
) == 0);
1470 wq
->wq_paced
|= bit
;
1471 tl
->th_flags
|= TH_LIST_PACING
;
1475 _wq_pacing_end(struct workqueue
*wq
, struct threadlist
*tl
)
1477 if (tl
->th_flags
& TH_LIST_PACING
) {
1478 uint8_t bit
= _wq_pacing_bit_for_pri(tl
->th_priority
);
1479 assert((wq
->wq_paced
& bit
) != 0);
1480 wq
->wq_paced
^= bit
;
1481 tl
->th_flags
&= ~TH_LIST_PACING
;
1482 return wq
->wq_paced
< bit
; // !_wq_should_pace_priority
1487 #pragma mark thread requests
1490 _threadreq_init_alloced(struct threadreq
*req
, int priority
, int flags
)
1492 assert((flags
& TR_FLAG_ONSTACK
) == 0);
1493 req
->tr_state
= TR_STATE_NEW
;
1494 req
->tr_priority
= priority
;
1495 req
->tr_flags
= flags
;
1499 _threadreq_init_stack(struct threadreq
*req
, int priority
, int flags
)
1501 req
->tr_state
= TR_STATE_NEW
;
1502 req
->tr_priority
= priority
;
1503 req
->tr_flags
= flags
| TR_FLAG_ONSTACK
;
1507 _threadreq_copy_prepare(struct workqueue
*wq
)
1510 if (wq
->wq_cached_threadreq
) {
1514 workqueue_unlock(wq
);
1515 struct threadreq
*req
= zalloc(pthread_zone_threadreq
);
1516 workqueue_lock_spin(wq
);
1518 if (wq
->wq_cached_threadreq
) {
1520 * We lost the race and someone left behind an extra threadreq for us
1521 * to use. Throw away our request and retry.
1523 workqueue_unlock(wq
);
1524 zfree(pthread_zone_threadreq
, req
);
1525 workqueue_lock_spin(wq
);
1528 wq
->wq_cached_threadreq
= req
;
1531 assert(wq
->wq_cached_threadreq
);
1535 _threadreq_copy_prepare_noblock(struct workqueue
*wq
)
1537 if (wq
->wq_cached_threadreq
) {
1541 wq
->wq_cached_threadreq
= zalloc_noblock(pthread_zone_threadreq
);
1543 return wq
->wq_cached_threadreq
!= NULL
;
1546 static inline struct threadreq_head
*
1547 _threadreq_list_for_req(struct workqueue
*wq
, const struct threadreq
*req
)
1549 if (req
->tr_flags
& TR_FLAG_OVERCOMMIT
) {
1550 return &wq
->wq_overcommit_reqlist
[req
->tr_priority
];
1552 return &wq
->wq_reqlist
[req
->tr_priority
];
1557 _threadreq_enqueue(struct workqueue
*wq
, struct threadreq
*req
)
1559 assert(req
&& req
->tr_state
== TR_STATE_NEW
);
1560 if (req
->tr_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
) {
1561 assert(wq
->wq_event_manager_threadreq
.tr_state
!= TR_STATE_WAITING
);
1562 memcpy(&wq
->wq_event_manager_threadreq
, req
, sizeof(struct threadreq
));
1563 req
= &wq
->wq_event_manager_threadreq
;
1564 req
->tr_flags
&= ~(TR_FLAG_ONSTACK
| TR_FLAG_NO_PACING
);
1566 if (req
->tr_flags
& TR_FLAG_ONSTACK
) {
1567 assert(wq
->wq_cached_threadreq
);
1568 struct threadreq
*newreq
= wq
->wq_cached_threadreq
;
1569 wq
->wq_cached_threadreq
= NULL
;
1571 memcpy(newreq
, req
, sizeof(struct threadreq
));
1572 newreq
->tr_flags
&= ~(TR_FLAG_ONSTACK
| TR_FLAG_NO_PACING
);
1573 req
->tr_state
= TR_STATE_DEAD
;
1576 TAILQ_INSERT_TAIL(_threadreq_list_for_req(wq
, req
), req
, tr_entry
);
1578 req
->tr_state
= TR_STATE_WAITING
;
1583 _threadreq_dequeue(struct workqueue
*wq
, struct threadreq
*req
)
1585 if (req
->tr_priority
!= WORKQUEUE_EVENT_MANAGER_BUCKET
) {
1586 struct threadreq_head
*req_list
= _threadreq_list_for_req(wq
, req
);
1588 struct threadreq
*cursor
= NULL
;
1589 TAILQ_FOREACH(cursor
, req_list
, tr_entry
) {
1590 if (cursor
== req
) break;
1592 assert(cursor
== req
);
1594 TAILQ_REMOVE(req_list
, req
, tr_entry
);
1600 * Mark a thread request as complete. At this point, it is treated as owned by
1601 * the submitting subsystem and you should assume it could be freed.
1603 * Called with the workqueue lock held.
1606 _threadreq_complete_and_unlock(proc_t p
, struct workqueue
*wq
,
1607 struct threadreq
*req
, struct threadlist
*tl
)
1609 struct threadreq
*req_tofree
= NULL
;
1610 bool sync
= (req
->tr_state
== TR_STATE_NEW
);
1611 bool workloop
= req
->tr_flags
& TR_FLAG_WORKLOOP
;
1612 bool onstack
= req
->tr_flags
& TR_FLAG_ONSTACK
;
1613 bool kevent
= req
->tr_flags
& TR_FLAG_KEVENT
;
1614 bool unbinding
= tl
->th_flags
& TH_LIST_UNBINDING
;
1616 bool waking_parked_thread
= (tl
->th_flags
& TH_LIST_BUSY
);
1619 req
->tr_state
= TR_STATE_COMPLETE
;
1621 if (!workloop
&& !onstack
&& req
!= &wq
->wq_event_manager_threadreq
) {
1622 if (wq
->wq_cached_threadreq
) {
1625 wq
->wq_cached_threadreq
= req
;
1629 if (tl
->th_flags
& TH_LIST_UNBINDING
) {
1630 tl
->th_flags
&= ~TH_LIST_UNBINDING
;
1631 assert((tl
->th_flags
& TH_LIST_KEVENT_BOUND
));
1632 } else if (workloop
|| kevent
) {
1633 assert((tl
->th_flags
& TH_LIST_KEVENT_BOUND
) == 0);
1634 tl
->th_flags
|= TH_LIST_KEVENT_BOUND
;
1638 workqueue_unlock(wq
);
1639 ret
= pthread_kern
->workloop_fulfill_threadreq(wq
->wq_proc
, (void*)req
,
1640 tl
->th_thread
, sync
? WORKLOOP_FULFILL_THREADREQ_SYNC
: 0);
1643 } else if (kevent
) {
1644 unsigned int kevent_flags
= KEVENT_FLAG_WORKQ
;
1646 kevent_flags
|= KEVENT_FLAG_SYNCHRONOUS_BIND
;
1648 if (tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
) {
1649 kevent_flags
|= KEVENT_FLAG_WORKQ_MANAGER
;
1651 workqueue_unlock(wq
);
1652 ret
= kevent_qos_internal_bind(wq
->wq_proc
,
1653 class_index_get_thread_qos(tl
->th_priority
), tl
->th_thread
,
1656 workqueue_lock_spin(wq
);
1657 tl
->th_flags
&= ~TH_LIST_KEVENT_BOUND
;
1667 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq
| DBG_FUNC_END
, wq
, 0, 0, 0, 0);
1668 PTHREAD_TRACE_WQ_REQ(TRACE_wq_runitem
| DBG_FUNC_START
, wq
, req
, tl
->th_priority
,
1669 thread_tid(current_thread()), thread_tid(tl
->th_thread
));
1671 if (waking_parked_thread
) {
1673 workqueue_lock_spin(wq
);
1675 tl
->th_flags
&= ~(TH_LIST_BUSY
);
1676 if ((tl
->th_flags
& TH_LIST_REMOVING_VOUCHER
) == 0) {
1678 * If the thread is in the process of removing its voucher, then it
1679 * isn't actually in the wait event yet and we don't need to wake
1680 * it up. Save the trouble (and potential lock-ordering issues
1683 thread_wakeup_thread(tl
, tl
->th_thread
);
1685 workqueue_unlock(wq
);
1687 if (req_tofree
) zfree(pthread_zone_threadreq
, req_tofree
);
1688 return WQ_RUN_TR_THREAD_STARTED
;
1691 assert ((tl
->th_flags
& TH_LIST_PACING
) == 0);
1693 workqueue_unlock(wq
);
1695 if (req_tofree
) zfree(pthread_zone_threadreq
, req_tofree
);
1697 return WQ_RUN_TR_THREAD_STARTED
;
1699 _setup_wqthread(p
, tl
->th_thread
, wq
, tl
, WQ_SETUP_CLEAR_VOUCHER
);
1700 pthread_kern
->unix_syscall_return(EJUSTRETURN
);
1701 __builtin_unreachable();
1705 * Mark a thread request as cancelled. Has similar ownership semantics to the
1706 * complete call above.
1709 _threadreq_cancel(struct workqueue
*wq
, struct threadreq
*req
)
1711 assert(req
->tr_state
== TR_STATE_WAITING
);
1712 req
->tr_state
= TR_STATE_DEAD
;
1714 assert((req
->tr_flags
& TR_FLAG_ONSTACK
) == 0);
1715 if (req
->tr_flags
& TR_FLAG_WORKLOOP
) {
1716 __assert_only
int ret
;
1717 ret
= pthread_kern
->workloop_fulfill_threadreq(wq
->wq_proc
, (void*)req
,
1718 THREAD_NULL
, WORKLOOP_FULFILL_THREADREQ_CANCEL
);
1719 assert(ret
== 0 || ret
== ECANCELED
);
1720 } else if (req
!= &wq
->wq_event_manager_threadreq
) {
1721 zfree(pthread_zone_threadreq
, req
);
1725 #pragma mark workqueue lock
1727 static boolean_t
workqueue_lock_spin_is_acquired_kdp(struct workqueue
*wq
) {
1728 return kdp_lck_spin_is_acquired(&wq
->wq_lock
);
1732 workqueue_lock_spin(struct workqueue
*wq
)
1734 assert(ml_get_interrupts_enabled() == TRUE
);
1735 lck_spin_lock(&wq
->wq_lock
);
1739 workqueue_lock_try(struct workqueue
*wq
)
1741 return lck_spin_try_lock(&wq
->wq_lock
);
1745 workqueue_unlock(struct workqueue
*wq
)
1747 lck_spin_unlock(&wq
->wq_lock
);
1750 #pragma mark workqueue add timer
1753 * Sets up the timer which will call out to workqueue_add_timer
1756 workqueue_interval_timer_start(struct workqueue
*wq
)
1760 /* n.b. wq_timer_interval is reset to 0 in workqueue_add_timer if the
1761 ATIMER_RUNNING flag is not present. The net effect here is that if a
1762 sequence of threads is required, we'll double the time before we give out
1764 if (wq
->wq_timer_interval
== 0) {
1765 wq
->wq_timer_interval
= wq_stalled_window_usecs
;
1768 wq
->wq_timer_interval
= wq
->wq_timer_interval
* 2;
1770 if (wq
->wq_timer_interval
> wq_max_timer_interval_usecs
) {
1771 wq
->wq_timer_interval
= wq_max_timer_interval_usecs
;
1774 clock_interval_to_deadline(wq
->wq_timer_interval
, 1000, &deadline
);
1776 PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer
, wq
, wq
->wq_reqcount
,
1777 _wq_flags(wq
), wq
->wq_timer_interval
, 0);
1779 thread_call_t call
= wq
->wq_atimer_delayed_call
;
1780 if (thread_call_enter1_delayed(call
, call
, deadline
)) {
1781 panic("delayed_call was already enqueued");
1786 * Immediately trigger the workqueue_add_timer
1789 workqueue_interval_timer_trigger(struct workqueue
*wq
)
1791 PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer
, wq
, wq
->wq_reqcount
,
1792 _wq_flags(wq
), 0, 0);
1794 thread_call_t call
= wq
->wq_atimer_immediate_call
;
1795 if (thread_call_enter1(call
, call
)) {
1796 panic("immediate_call was already enqueued");
1801 * returns whether lastblocked_tsp is within wq_stalled_window_usecs of cur_ts
1804 wq_thread_is_busy(uint64_t cur_ts
, _Atomic
uint64_t *lastblocked_tsp
)
1808 uint64_t lastblocked_ts
;
1811 lastblocked_ts
= atomic_load_explicit(lastblocked_tsp
, memory_order_relaxed
);
1812 if (lastblocked_ts
>= cur_ts
) {
1814 * because the update of the timestamp when a thread blocks isn't
1815 * serialized against us looking at it (i.e. we don't hold the workq lock)
1816 * it's possible to have a timestamp that matches the current time or
1817 * that even looks to be in the future relative to when we grabbed the current
1818 * time... just treat this as a busy thread since it must have just blocked.
1822 elapsed
= cur_ts
- lastblocked_ts
;
1824 pthread_kern
->absolutetime_to_microtime(elapsed
, &secs
, &usecs
);
1826 return (secs
== 0 && usecs
< wq_stalled_window_usecs
);
1830 * handler function for the timer
1833 workqueue_add_timer(struct workqueue
*wq
, thread_call_t thread_call_self
)
1835 proc_t p
= wq
->wq_proc
;
1837 workqueue_lock_spin(wq
);
1839 PTHREAD_TRACE_WQ(TRACE_wq_add_timer
| DBG_FUNC_START
, wq
,
1840 _wq_flags(wq
), wq
->wq_nthreads
, wq
->wq_thidlecount
, 0);
1843 * There's two tricky issues here.
1845 * First issue: we start the thread_call's that invoke this routine without
1846 * the workqueue lock held. The scheduler callback needs to trigger
1847 * reevaluation of the number of running threads but shouldn't take that
1848 * lock, so we can't use it to synchronize state around the thread_call.
1849 * As a result, it might re-enter the thread_call while this routine is
1850 * already running. This could cause it to fire a second time and we'll
1851 * have two add_timers running at once. Obviously, we don't want that to
1852 * keep stacking, so we need to keep it at two timers.
1854 * Solution: use wq_flags (accessed via atomic CAS) to synchronize the
1855 * enqueue of the thread_call itself. When a thread needs to trigger the
1856 * add_timer, it checks for ATIMER_DELAYED_RUNNING and, when not set, sets
1857 * the flag then does a thread_call_enter. We'll then remove that flag
1858 * only once we've got the lock and it's safe for the thread_call to be
1861 * Second issue: we need to make sure that the two timers don't execute this
1862 * routine concurrently. We can't use the workqueue lock for this because
1863 * we'll need to drop it during our execution.
1865 * Solution: use WQL_ATIMER_BUSY as a condition variable to indicate that
1866 * we are currently executing the routine and the next thread should wait.
1868 * After all that, we arrive at the following four possible states:
1869 * !WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY no pending timer, no active timer
1870 * !WQ_ATIMER_DELAYED_RUNNING && WQL_ATIMER_BUSY no pending timer, 1 active timer
1871 * WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY 1 pending timer, no active timer
1872 * WQ_ATIMER_DELAYED_RUNNING && WQL_ATIMER_BUSY 1 pending timer, 1 active timer
1874 * Further complication sometimes we need to trigger this function to run
1875 * without delay. Because we aren't under a lock between setting
1876 * WQ_ATIMER_DELAYED_RUNNING and calling thread_call_enter, we can't simply
1877 * re-enter the thread call: if thread_call_enter() returned false, we
1878 * wouldn't be able to distinguish the case where the thread_call had
1879 * already fired from the case where it hadn't been entered yet from the
1880 * other thread. So, we use a separate thread_call for immediate
1881 * invocations, and a separate RUNNING flag, WQ_ATIMER_IMMEDIATE_RUNNING.
1884 while (wq
->wq_lflags
& WQL_ATIMER_BUSY
) {
1885 wq
->wq_lflags
|= WQL_ATIMER_WAITING
;
1887 assert_wait((caddr_t
)wq
, (THREAD_UNINT
));
1888 workqueue_unlock(wq
);
1890 thread_block(THREAD_CONTINUE_NULL
);
1892 workqueue_lock_spin(wq
);
1895 * Prevent _workqueue_mark_exiting() from going away
1897 wq
->wq_lflags
|= WQL_ATIMER_BUSY
;
1900 * Decide which timer we are and remove the RUNNING flag.
1902 if (thread_call_self
== wq
->wq_atimer_delayed_call
) {
1903 uint64_t wq_flags
= _wq_flags_and_orig(wq
, ~WQ_ATIMER_DELAYED_RUNNING
);
1904 if ((wq_flags
& WQ_ATIMER_DELAYED_RUNNING
) == 0) {
1905 panic("workqueue_add_timer(delayed) w/o WQ_ATIMER_DELAYED_RUNNING");
1907 } else if (thread_call_self
== wq
->wq_atimer_immediate_call
) {
1908 uint64_t wq_flags
= _wq_flags_and_orig(wq
, ~WQ_ATIMER_IMMEDIATE_RUNNING
);
1909 if ((wq_flags
& WQ_ATIMER_IMMEDIATE_RUNNING
) == 0) {
1910 panic("workqueue_add_timer(immediate) w/o WQ_ATIMER_IMMEDIATE_RUNNING");
1913 panic("workqueue_add_timer can't figure out which timer it is");
1916 int ret
= WQ_RUN_TR_THREAD_STARTED
;
1917 while (ret
== WQ_RUN_TR_THREAD_STARTED
&& wq
->wq_reqcount
) {
1918 ret
= workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, NULL
, true);
1920 workqueue_lock_spin(wq
);
1922 _threadreq_copy_prepare(wq
);
1925 * If we called WQ_TIMER_NEEDED above, then this flag will be set if that
1926 * call marked the timer running. If so, we let the timer interval grow.
1927 * Otherwise, we reset it back to 0.
1929 uint32_t wq_flags
= _wq_flags(wq
);
1930 if (!(wq_flags
& WQ_ATIMER_DELAYED_RUNNING
)) {
1931 wq
->wq_timer_interval
= 0;
1934 wq
->wq_lflags
&= ~WQL_ATIMER_BUSY
;
1936 if ((wq_flags
& WQ_EXITING
) || (wq
->wq_lflags
& WQL_ATIMER_WAITING
)) {
1938 * wakeup the thread hung up in _workqueue_mark_exiting or
1939 * workqueue_add_timer waiting for this timer to finish getting out of
1942 wq
->wq_lflags
&= ~WQL_ATIMER_WAITING
;
1946 PTHREAD_TRACE_WQ(TRACE_wq_add_timer
| DBG_FUNC_END
, wq
, 0, wq
->wq_nthreads
, wq
->wq_thidlecount
, 0);
1948 workqueue_unlock(wq
);
1951 #pragma mark thread state tracking
1953 // called by spinlock code when trying to yield to lock owner
1955 _workqueue_thread_yielded(void)
1960 workqueue_callback(int type
, thread_t thread
)
1962 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(thread
);
1963 struct threadlist
*tl
= pthread_kern
->uthread_get_threadlist(uth
);
1964 struct workqueue
*wq
= tl
->th_workq
;
1965 uint32_t old_count
, req_qos
, qos
= tl
->th_priority
;
1966 wq_thactive_t old_thactive
;
1969 case SCHED_CALL_BLOCK
: {
1970 bool start_timer
= false;
1972 old_thactive
= _wq_thactive_dec(wq
, tl
->th_priority
);
1973 req_qos
= WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive
);
1974 old_count
= _wq_thactive_aggregate_downto_qos(wq
, old_thactive
,
1977 if (old_count
== wq_max_concurrency
[tl
->th_priority
]) {
1979 * The number of active threads at this priority has fallen below
1980 * the maximum number of concurrent threads that are allowed to run
1982 * if we collide with another thread trying to update the
1983 * last_blocked (really unlikely since another thread would have to
1984 * get scheduled and then block after we start down this path), it's
1985 * not a problem. Either timestamp is adequate, so no need to retry
1987 atomic_store_explicit(&wq
->wq_lastblocked_ts
[qos
],
1988 mach_absolute_time(), memory_order_relaxed
);
1991 if (req_qos
== WORKQUEUE_EVENT_MANAGER_BUCKET
|| qos
> req_qos
) {
1993 * The blocking thread is at a lower QoS than the highest currently
1994 * pending constrained request, nothing has to be redriven
1997 uint32_t max_busycount
, old_req_count
;
1998 old_req_count
= _wq_thactive_aggregate_downto_qos(wq
, old_thactive
,
1999 req_qos
, NULL
, &max_busycount
);
2001 * If it is possible that may_start_constrained_thread had refused
2002 * admission due to being over the max concurrency, we may need to
2003 * spin up a new thread.
2005 * We take into account the maximum number of busy threads
2006 * that can affect may_start_constrained_thread as looking at the
2007 * actual number may_start_constrained_thread will see is racy.
2009 * IOW at NCPU = 4, for IN (req_qos = 1), if the old req count is
2010 * between NCPU (4) and NCPU - 2 (2) we need to redrive.
2012 if (wq_max_concurrency
[req_qos
] <= old_req_count
+ max_busycount
&&
2013 old_req_count
<= wq_max_concurrency
[req_qos
]) {
2014 if (WQ_TIMER_DELAYED_NEEDED(wq
)) {
2016 workqueue_interval_timer_start(wq
);
2021 PTHREAD_TRACE_WQ(TRACE_wq_thread_block
| DBG_FUNC_START
, wq
,
2022 old_count
- 1, qos
| (req_qos
<< 8),
2023 wq
->wq_reqcount
<< 1 | start_timer
, 0);
2026 case SCHED_CALL_UNBLOCK
: {
2028 * we cannot take the workqueue_lock here...
2029 * an UNBLOCK can occur from a timer event which
2030 * is run from an interrupt context... if the workqueue_lock
2031 * is already held by this processor, we'll deadlock...
2032 * the thread lock for the thread being UNBLOCKED
2035 old_thactive
= _wq_thactive_inc(wq
, qos
);
2036 if (pthread_debug_tracing
) {
2037 req_qos
= WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive
);
2038 old_count
= _wq_thactive_aggregate_downto_qos(wq
, old_thactive
,
2040 PTHREAD_TRACE_WQ(TRACE_wq_thread_block
| DBG_FUNC_END
, wq
,
2041 old_count
+ 1, qos
| (req_qos
<< 8),
2042 wq
->wq_threads_scheduled
, 0);
2050 _workqueue_get_sched_callback(void)
2052 return workqueue_callback
;
2055 #pragma mark thread addition/removal
2057 static mach_vm_size_t
2058 _workqueue_allocsize(struct workqueue
*wq
)
2060 proc_t p
= wq
->wq_proc
;
2061 mach_vm_size_t guardsize
= vm_map_page_size(wq
->wq_map
);
2062 mach_vm_size_t pthread_size
=
2063 vm_map_round_page_mask(pthread_kern
->proc_get_pthsize(p
) + PTHREAD_T_OFFSET
, vm_map_page_mask(wq
->wq_map
));
2064 return guardsize
+ PTH_DEFAULT_STACKSIZE
+ pthread_size
;
2068 * pop goes the thread
2070 * If fromexit is set, the call is from workqueue_exit(,
2071 * so some cleanups are to be avoided.
2074 workqueue_removethread(struct threadlist
*tl
, bool fromexit
, bool first_use
)
2076 struct uthread
* uth
;
2077 struct workqueue
* wq
= tl
->th_workq
;
2079 if (tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
){
2080 TAILQ_REMOVE(&wq
->wq_thidlemgrlist
, tl
, th_entry
);
2082 TAILQ_REMOVE(&wq
->wq_thidlelist
, tl
, th_entry
);
2085 if (fromexit
== 0) {
2086 assert(wq
->wq_nthreads
&& wq
->wq_thidlecount
);
2088 wq
->wq_thidlecount
--;
2092 * Clear the threadlist pointer in uthread so
2093 * blocked thread on wakeup for termination will
2094 * not access the thread list as it is going to be
2097 pthread_kern
->thread_sched_call(tl
->th_thread
, NULL
);
2099 uth
= pthread_kern
->get_bsdthread_info(tl
->th_thread
);
2100 if (uth
!= (struct uthread
*)0) {
2101 pthread_kern
->uthread_set_threadlist(uth
, NULL
);
2103 if (fromexit
== 0) {
2104 /* during exit the lock is not held */
2105 workqueue_unlock(wq
);
2108 if ( (tl
->th_flags
& TH_LIST_NEW
) || first_use
) {
2110 * thread was created, but never used...
2111 * need to clean up the stack and port ourselves
2112 * since we're not going to spin up through the
2113 * normal exit path triggered from Libc
2115 if (fromexit
== 0) {
2116 /* vm map is already deallocated when this is called from exit */
2117 (void)mach_vm_deallocate(wq
->wq_map
, tl
->th_stackaddr
, _workqueue_allocsize(wq
));
2119 (void)pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(wq
->wq_task
), tl
->th_thport
);
2122 * drop our ref on the thread
2124 thread_deallocate(tl
->th_thread
);
2126 zfree(pthread_zone_threadlist
, tl
);
2131 * Try to add a new workqueue thread.
2133 * - called with workq lock held
2134 * - dropped and retaken around thread creation
2135 * - return with workq lock held
2138 workqueue_addnewthread(proc_t p
, struct workqueue
*wq
)
2144 workqueue_unlock(wq
);
2146 struct threadlist
*tl
= zalloc(pthread_zone_threadlist
);
2147 bzero(tl
, sizeof(struct threadlist
));
2150 kret
= pthread_kern
->thread_create_workq_waiting(wq
->wq_task
, wq_unpark_continue
, tl
, &th
);
2151 if (kret
!= KERN_SUCCESS
) {
2152 PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed
| DBG_FUNC_NONE
, wq
, kret
, 0, 0, 0);
2156 mach_vm_offset_t stackaddr
= pthread_kern
->proc_get_stack_addr_hint(p
);
2158 mach_vm_size_t guardsize
= vm_map_page_size(wq
->wq_map
);
2159 mach_vm_size_t pthread_size
=
2160 vm_map_round_page_mask(pthread_kern
->proc_get_pthsize(p
) + PTHREAD_T_OFFSET
, vm_map_page_mask(wq
->wq_map
));
2161 mach_vm_size_t th_allocsize
= guardsize
+ PTH_DEFAULT_STACKSIZE
+ pthread_size
;
2163 kret
= mach_vm_map(wq
->wq_map
, &stackaddr
,
2164 th_allocsize
, page_size
-1,
2165 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
, NULL
,
2166 0, FALSE
, VM_PROT_DEFAULT
, VM_PROT_ALL
,
2167 VM_INHERIT_DEFAULT
);
2169 if (kret
!= KERN_SUCCESS
) {
2170 kret
= mach_vm_allocate(wq
->wq_map
,
2171 &stackaddr
, th_allocsize
,
2172 VM_MAKE_TAG(VM_MEMORY_STACK
) | VM_FLAGS_ANYWHERE
);
2175 if (kret
!= KERN_SUCCESS
) {
2176 PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed
| DBG_FUNC_NONE
, wq
, kret
, 1, 0, 0);
2177 goto fail_terminate
;
2181 * The guard page is at the lowest address
2182 * The stack base is the highest address
2184 kret
= mach_vm_protect(wq
->wq_map
, stackaddr
, guardsize
, FALSE
, VM_PROT_NONE
);
2185 if (kret
!= KERN_SUCCESS
) {
2186 PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed
| DBG_FUNC_NONE
, wq
, kret
, 2, 0, 0);
2187 goto fail_vm_deallocate
;
2191 pthread_kern
->thread_set_tag(th
, THREAD_TAG_PTHREAD
| THREAD_TAG_WORKQUEUE
);
2192 pthread_kern
->thread_static_param(th
, TRUE
);
2195 * convert_thread_to_port() consumes a reference
2197 thread_reference(th
);
2198 void *sright
= (void *)pthread_kern
->convert_thread_to_port(th
);
2199 tl
->th_thport
= pthread_kern
->ipc_port_copyout_send(sright
,
2200 pthread_kern
->task_get_ipcspace(wq
->wq_task
));
2202 tl
->th_flags
= TH_LIST_INITED
| TH_LIST_NEW
;
2205 tl
->th_stackaddr
= stackaddr
;
2206 tl
->th_priority
= WORKQUEUE_NUM_BUCKETS
;
2208 struct uthread
*uth
;
2209 uth
= pthread_kern
->get_bsdthread_info(tl
->th_thread
);
2211 workqueue_lock_spin(wq
);
2213 void *current_tl
= pthread_kern
->uthread_get_threadlist(uth
);
2214 if (current_tl
== NULL
) {
2215 pthread_kern
->uthread_set_threadlist(uth
, tl
);
2216 TAILQ_INSERT_TAIL(&wq
->wq_thidlelist
, tl
, th_entry
);
2217 wq
->wq_thidlecount
++;
2218 } else if (current_tl
== WQ_THREADLIST_EXITING_POISON
) {
2220 * Failed thread creation race: The thread already woke up and has exited.
2222 PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed
| DBG_FUNC_NONE
, wq
, kret
, 3, 0, 0);
2225 panic("Unexpected initial threadlist value");
2228 PTHREAD_TRACE_WQ(TRACE_wq_thread_create
| DBG_FUNC_NONE
, wq
, 0, 0, 0, 0);
2233 workqueue_unlock(wq
);
2234 (void)pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(wq
->wq_task
),
2238 (void) mach_vm_deallocate(wq
->wq_map
, stackaddr
, th_allocsize
);
2241 if (pthread_kern
->thread_will_park_or_terminate
) {
2242 pthread_kern
->thread_will_park_or_terminate(th
);
2244 (void)thread_terminate(th
);
2245 thread_deallocate(th
);
2248 zfree(pthread_zone_threadlist
, tl
);
2250 workqueue_lock_spin(wq
);
2257 * Setup per-process state for the workqueue.
2260 _workq_open(struct proc
*p
, __unused
int32_t *retval
)
2262 struct workqueue
* wq
;
2267 if (pthread_kern
->proc_get_register(p
) == 0) {
2271 num_cpus
= pthread_kern
->ml_get_max_cpus();
2273 if (wq_init_constrained_limit
) {
2276 * set up the limit for the constrained pool
2277 * this is a virtual pool in that we don't
2278 * maintain it on a separate idle and run list
2280 limit
= num_cpus
* WORKQUEUE_CONSTRAINED_FACTOR
;
2282 if (limit
> wq_max_constrained_threads
)
2283 wq_max_constrained_threads
= limit
;
2285 wq_init_constrained_limit
= 0;
2287 if (wq_max_threads
> WQ_THACTIVE_BUCKET_HALF
) {
2288 wq_max_threads
= WQ_THACTIVE_BUCKET_HALF
;
2290 if (wq_max_threads
> pthread_kern
->config_thread_max
- 20) {
2291 wq_max_threads
= pthread_kern
->config_thread_max
- 20;
2295 if (pthread_kern
->proc_get_wqptr(p
) == NULL
) {
2296 if (pthread_kern
->proc_init_wqptr_or_wait(p
) == FALSE
) {
2297 assert(pthread_kern
->proc_get_wqptr(p
) != NULL
);
2301 ptr
= (char *)zalloc(pthread_zone_workqueue
);
2302 bzero(ptr
, sizeof(struct workqueue
));
2304 wq
= (struct workqueue
*)ptr
;
2306 wq
->wq_task
= current_task();
2307 wq
->wq_map
= pthread_kern
->current_map();
2309 // Start the event manager at the priority hinted at by the policy engine
2310 int mgr_priority_hint
= pthread_kern
->task_get_default_manager_qos(current_task());
2311 wq
->wq_event_manager_priority
= (uint32_t)thread_qos_get_pthread_priority(mgr_priority_hint
) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
;
2313 TAILQ_INIT(&wq
->wq_thrunlist
);
2314 TAILQ_INIT(&wq
->wq_thidlelist
);
2315 for (int i
= 0; i
< WORKQUEUE_EVENT_MANAGER_BUCKET
; i
++) {
2316 TAILQ_INIT(&wq
->wq_overcommit_reqlist
[i
]);
2317 TAILQ_INIT(&wq
->wq_reqlist
[i
]);
2320 wq
->wq_atimer_delayed_call
=
2321 thread_call_allocate_with_priority((thread_call_func_t
)workqueue_add_timer
,
2322 (thread_call_param_t
)wq
, THREAD_CALL_PRIORITY_KERNEL
);
2323 wq
->wq_atimer_immediate_call
=
2324 thread_call_allocate_with_priority((thread_call_func_t
)workqueue_add_timer
,
2325 (thread_call_param_t
)wq
, THREAD_CALL_PRIORITY_KERNEL
);
2327 lck_spin_init(&wq
->wq_lock
, pthread_lck_grp
, pthread_lck_attr
);
2329 wq
->wq_cached_threadreq
= zalloc(pthread_zone_threadreq
);
2330 *(wq_thactive_t
*)&wq
->wq_thactive
=
2331 (wq_thactive_t
)WQ_THACTIVE_NO_PENDING_REQUEST
<<
2332 WQ_THACTIVE_QOS_SHIFT
;
2334 pthread_kern
->proc_set_wqptr(p
, wq
);
2343 * Routine: workqueue_mark_exiting
2345 * Function: Mark the work queue such that new threads will not be added to the
2346 * work queue after we return.
2348 * Conditions: Called against the current process.
2351 _workqueue_mark_exiting(struct proc
*p
)
2353 struct workqueue
*wq
= pthread_kern
->proc_get_wqptr(p
);
2356 PTHREAD_TRACE_WQ(TRACE_wq_pthread_exit
|DBG_FUNC_START
, wq
, 0, 0, 0, 0);
2358 workqueue_lock_spin(wq
);
2361 * We arm the add timer without holding the workqueue lock so we need
2362 * to synchronize with any running or soon to be running timers.
2364 * Threads that intend to arm the timer atomically OR
2365 * WQ_ATIMER_{DELAYED,IMMEDIATE}_RUNNING into the wq_flags, only if
2366 * WQ_EXITING is not present. So, once we have set WQ_EXITING, we can
2367 * be sure that no new RUNNING flags will be set, but still need to
2368 * wait for the already running timers to complete.
2370 * We always hold the workq lock when dropping WQ_ATIMER_RUNNING, so
2371 * the check for and sleep until clear is protected.
2373 uint64_t wq_flags
= _wq_flags_or_orig(wq
, WQ_EXITING
);
2375 if (wq_flags
& WQ_ATIMER_DELAYED_RUNNING
) {
2376 if (thread_call_cancel(wq
->wq_atimer_delayed_call
) == TRUE
) {
2377 wq_flags
= _wq_flags_and_orig(wq
, ~WQ_ATIMER_DELAYED_RUNNING
);
2380 if (wq_flags
& WQ_ATIMER_IMMEDIATE_RUNNING
) {
2381 if (thread_call_cancel(wq
->wq_atimer_immediate_call
) == TRUE
) {
2382 wq_flags
= _wq_flags_and_orig(wq
, ~WQ_ATIMER_IMMEDIATE_RUNNING
);
2385 while ((_wq_flags(wq
) & (WQ_ATIMER_DELAYED_RUNNING
| WQ_ATIMER_IMMEDIATE_RUNNING
)) ||
2386 (wq
->wq_lflags
& WQL_ATIMER_BUSY
)) {
2387 assert_wait((caddr_t
)wq
, (THREAD_UNINT
));
2388 workqueue_unlock(wq
);
2390 thread_block(THREAD_CONTINUE_NULL
);
2392 workqueue_lock_spin(wq
);
2396 * Save off pending requests, will complete/free them below after unlocking
2398 TAILQ_HEAD(, threadreq
) local_list
= TAILQ_HEAD_INITIALIZER(local_list
);
2400 for (int i
= 0; i
< WORKQUEUE_EVENT_MANAGER_BUCKET
; i
++) {
2401 TAILQ_CONCAT(&local_list
, &wq
->wq_overcommit_reqlist
[i
], tr_entry
);
2402 TAILQ_CONCAT(&local_list
, &wq
->wq_reqlist
[i
], tr_entry
);
2406 * XXX: Can't deferred cancel the event manager request, so just smash it.
2408 assert((wq
->wq_event_manager_threadreq
.tr_flags
& TR_FLAG_WORKLOOP
) == 0);
2409 wq
->wq_event_manager_threadreq
.tr_state
= TR_STATE_DEAD
;
2411 workqueue_unlock(wq
);
2413 struct threadreq
*tr
, *tr_temp
;
2414 TAILQ_FOREACH_SAFE(tr
, &local_list
, tr_entry
, tr_temp
) {
2415 _threadreq_cancel(wq
, tr
);
2417 PTHREAD_TRACE(TRACE_wq_pthread_exit
|DBG_FUNC_END
, 0, 0, 0, 0, 0);
2421 * Routine: workqueue_exit
2423 * Function: clean up the work queue structure(s) now that there are no threads
2424 * left running inside the work queue (except possibly current_thread).
2426 * Conditions: Called by the last thread in the process.
2427 * Called against current process.
2430 _workqueue_exit(struct proc
*p
)
2432 struct workqueue
* wq
;
2433 struct threadlist
* tl
, *tlist
;
2434 struct uthread
*uth
;
2436 wq
= pthread_kern
->proc_get_wqptr(p
);
2439 PTHREAD_TRACE_WQ(TRACE_wq_workqueue_exit
|DBG_FUNC_START
, wq
, 0, 0, 0, 0);
2441 pthread_kern
->proc_set_wqptr(p
, NULL
);
2444 * Clean up workqueue data structures for threads that exited and
2445 * didn't get a chance to clean up after themselves.
2447 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thrunlist
, th_entry
, tlist
) {
2448 assert((tl
->th_flags
& TH_LIST_RUNNING
) != 0);
2450 pthread_kern
->thread_sched_call(tl
->th_thread
, NULL
);
2452 uth
= pthread_kern
->get_bsdthread_info(tl
->th_thread
);
2453 if (uth
!= (struct uthread
*)0) {
2454 pthread_kern
->uthread_set_threadlist(uth
, NULL
);
2456 TAILQ_REMOVE(&wq
->wq_thrunlist
, tl
, th_entry
);
2459 * drop our last ref on the thread
2461 thread_deallocate(tl
->th_thread
);
2463 zfree(pthread_zone_threadlist
, tl
);
2465 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thidlelist
, th_entry
, tlist
) {
2466 assert((tl
->th_flags
& TH_LIST_RUNNING
) == 0);
2467 assert(tl
->th_priority
!= WORKQUEUE_EVENT_MANAGER_BUCKET
);
2468 workqueue_removethread(tl
, true, false);
2470 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thidlemgrlist
, th_entry
, tlist
) {
2471 assert((tl
->th_flags
& TH_LIST_RUNNING
) == 0);
2472 assert(tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
);
2473 workqueue_removethread(tl
, true, false);
2475 if (wq
->wq_cached_threadreq
) {
2476 zfree(pthread_zone_threadreq
, wq
->wq_cached_threadreq
);
2478 thread_call_free(wq
->wq_atimer_delayed_call
);
2479 thread_call_free(wq
->wq_atimer_immediate_call
);
2480 lck_spin_destroy(&wq
->wq_lock
, pthread_lck_grp
);
2482 for (int i
= 0; i
< WORKQUEUE_EVENT_MANAGER_BUCKET
; i
++) {
2483 assert(TAILQ_EMPTY(&wq
->wq_overcommit_reqlist
[i
]));
2484 assert(TAILQ_EMPTY(&wq
->wq_reqlist
[i
]));
2487 zfree(pthread_zone_workqueue
, wq
);
2489 PTHREAD_TRACE(TRACE_wq_workqueue_exit
|DBG_FUNC_END
, 0, 0, 0, 0, 0);
2494 #pragma mark workqueue thread manipulation
2498 * Entry point for libdispatch to ask for threads
2501 wqops_queue_reqthreads(struct proc
*p
, int reqcount
,
2502 pthread_priority_t priority
)
2504 bool overcommit
= _pthread_priority_get_flags(priority
) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
;
2505 bool event_manager
= _pthread_priority_get_flags(priority
) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
;
2506 int class = event_manager
? WORKQUEUE_EVENT_MANAGER_BUCKET
:
2507 pthread_priority_get_class_index(priority
);
2509 if ((reqcount
<= 0) || (class < 0) || (class >= WORKQUEUE_NUM_BUCKETS
) ||
2510 (overcommit
&& event_manager
)) {
2514 struct workqueue
*wq
;
2515 if ((wq
= (struct workqueue
*)pthread_kern
->proc_get_wqptr(p
)) == NULL
) {
2519 workqueue_lock_spin(wq
);
2520 _threadreq_copy_prepare(wq
);
2522 PTHREAD_TRACE_WQ(TRACE_wq_wqops_reqthreads
| DBG_FUNC_NONE
, wq
, reqcount
, priority
, 0, 0);
2525 if (overcommit
) tr_flags
|= TR_FLAG_OVERCOMMIT
;
2528 * when libdispatch asks for more than one thread, it wants to achieve
2529 * parallelism. Pacing would be detrimental to this ask, so treat
2530 * these specially to not do the pacing admission check
2532 tr_flags
|= TR_FLAG_NO_PACING
;
2535 while (reqcount
-- && !_wq_exiting(wq
)) {
2536 struct threadreq req
;
2537 _threadreq_init_stack(&req
, class, tr_flags
);
2539 workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, &req
, true);
2541 workqueue_lock_spin(wq
); /* reacquire */
2542 _threadreq_copy_prepare(wq
);
2545 workqueue_unlock(wq
);
2551 * Used by the kevent system to request threads.
2553 * Currently count is ignored and we always return one thread per invocation.
2556 _workq_kevent_reqthreads(struct proc
*p
, pthread_priority_t priority
,
2559 int wq_run_tr
= WQ_RUN_TR_THROTTLED
;
2560 bool emergency_thread
= false;
2561 struct threadreq req
;
2564 struct workqueue
*wq
;
2565 if ((wq
= (struct workqueue
*)pthread_kern
->proc_get_wqptr(p
)) == NULL
) {
2569 int class = pthread_priority_get_class_index(priority
);
2571 workqueue_lock_spin(wq
);
2572 bool has_threadreq
= _threadreq_copy_prepare_noblock(wq
);
2574 PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads
| DBG_FUNC_NONE
, wq
, NULL
, priority
, 0, 0);
2577 * Skip straight to event manager if that's what was requested
2579 if ((_pthread_priority_get_qos_newest(priority
) == QOS_CLASS_UNSPECIFIED
) ||
2580 (_pthread_priority_get_flags(priority
) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
)){
2584 bool will_pace
= _wq_should_pace_priority(wq
, class);
2585 if ((wq
->wq_thidlecount
== 0 || will_pace
) && has_threadreq
== false) {
2587 * We'll need to persist the request and can't, so return the emergency
2588 * thread instead, which has a persistent request object.
2590 emergency_thread
= true;
2595 * Handle overcommit requests
2597 if ((_pthread_priority_get_flags(priority
) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
) != 0){
2598 _threadreq_init_stack(&req
, class, TR_FLAG_KEVENT
| TR_FLAG_OVERCOMMIT
);
2599 wq_run_tr
= workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, &req
, false);
2604 * Handle constrained requests
2606 boolean_t may_start
= may_start_constrained_thread(wq
, class, NULL
, false);
2607 if (may_start
|| no_emergency
) {
2608 _threadreq_init_stack(&req
, class, TR_FLAG_KEVENT
);
2609 wq_run_tr
= workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, &req
, false);
2612 emergency_thread
= true;
2617 _threadreq_init_stack(&req
, WORKQUEUE_EVENT_MANAGER_BUCKET
, TR_FLAG_KEVENT
);
2618 wq_run_tr
= workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, &req
, false);
2621 if (wq_run_tr
== WQ_RUN_TR_THREAD_NEEDED
&& WQ_TIMER_IMMEDIATE_NEEDED(wq
)) {
2622 workqueue_interval_timer_trigger(wq
);
2624 return emergency_thread
? (void*)-1 : 0;
2628 _workq_reqthreads(struct proc
*p
, __assert_only
int requests_count
,
2629 workq_reqthreads_req_t request
)
2631 assert(requests_count
== 1);
2633 pthread_priority_t priority
= request
->priority
;
2634 bool no_emergency
= request
->count
& WORKQ_REQTHREADS_NOEMERGENCY
;
2636 return _workq_kevent_reqthreads(p
, priority
, no_emergency
);
2641 workq_kern_threadreq(struct proc
*p
, workq_threadreq_t _req
,
2642 enum workq_threadreq_type type
, unsigned long priority
, int flags
)
2644 struct workqueue
*wq
;
2647 if ((wq
= (struct workqueue
*)pthread_kern
->proc_get_wqptr(p
)) == NULL
) {
2652 case WORKQ_THREADREQ_KEVENT
: {
2653 bool no_emergency
= flags
& WORKQ_THREADREQ_FLAG_NOEMERGENCY
;
2654 (void)_workq_kevent_reqthreads(p
, priority
, no_emergency
);
2657 case WORKQ_THREADREQ_WORKLOOP
:
2658 case WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
: {
2659 struct threadreq
*req
= (struct threadreq
*)_req
;
2660 int req_class
= pthread_priority_get_class_index(priority
);
2661 int req_flags
= TR_FLAG_WORKLOOP
;
2662 if ((_pthread_priority_get_flags(priority
) &
2663 _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
) != 0){
2664 req_flags
|= TR_FLAG_OVERCOMMIT
;
2667 thread_t thread
= current_thread();
2668 struct threadlist
*tl
= util_get_thread_threadlist_entry(thread
);
2670 if (tl
&& tl
!= WQ_THREADLIST_EXITING_POISON
&&
2671 (tl
->th_flags
& TH_LIST_UNBINDING
)) {
2673 * we're called back synchronously from the context of
2674 * kevent_qos_internal_unbind from within wqops_thread_return()
2675 * we can try to match up this thread with this request !
2681 _threadreq_init_alloced(req
, req_class
, req_flags
);
2682 workqueue_lock_spin(wq
);
2683 PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads
| DBG_FUNC_NONE
, wq
, req
, priority
, 1, 0);
2684 ret
= workqueue_run_threadreq_and_unlock(p
, wq
, tl
, req
, false);
2685 if (ret
== WQ_RUN_TR_EXITING
) {
2688 if (ret
== WQ_RUN_TR_THREAD_NEEDED
) {
2689 if (type
== WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
) {
2692 if (WQ_TIMER_IMMEDIATE_NEEDED(wq
)) {
2693 workqueue_interval_timer_trigger(wq
);
2698 case WORKQ_THREADREQ_REDRIVE
:
2699 PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads
| DBG_FUNC_NONE
, wq
, 0, 0, 4, 0);
2700 workqueue_lock_spin(wq
);
2701 ret
= workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, NULL
, true);
2702 if (ret
== WQ_RUN_TR_EXITING
) {
2712 workq_kern_threadreq_modify(struct proc
*p
, workq_threadreq_t _req
,
2713 enum workq_threadreq_op operation
, unsigned long arg1
,
2714 unsigned long __unused arg2
)
2716 struct threadreq
*req
= (struct threadreq
*)_req
;
2717 struct workqueue
*wq
;
2718 int priclass
, ret
= 0, wq_tr_rc
= WQ_RUN_TR_THROTTLED
;
2720 if (req
== NULL
|| (wq
= pthread_kern
->proc_get_wqptr(p
)) == NULL
) {
2724 workqueue_lock_spin(wq
);
2726 if (_wq_exiting(wq
)) {
2732 * Find/validate the referenced request structure
2734 if (req
->tr_state
!= TR_STATE_WAITING
) {
2738 assert(req
->tr_priority
< WORKQUEUE_EVENT_MANAGER_BUCKET
);
2739 assert(req
->tr_flags
& TR_FLAG_WORKLOOP
);
2741 switch (operation
) {
2742 case WORKQ_THREADREQ_CHANGE_PRI
:
2743 case WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL
:
2744 priclass
= pthread_priority_get_class_index(arg1
);
2745 PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads
| DBG_FUNC_NONE
, wq
, req
, arg1
, 2, 0);
2746 if (req
->tr_priority
== priclass
) {
2749 _threadreq_dequeue(wq
, req
);
2750 req
->tr_priority
= priclass
;
2751 req
->tr_state
= TR_STATE_NEW
; // what was old is new again
2752 wq_tr_rc
= workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, req
, false);
2755 case WORKQ_THREADREQ_CANCEL
:
2756 PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads
| DBG_FUNC_NONE
, wq
, req
, 0, 3, 0);
2757 _threadreq_dequeue(wq
, req
);
2758 req
->tr_state
= TR_STATE_DEAD
;
2767 workqueue_unlock(wq
);
2769 if (wq_tr_rc
== WQ_RUN_TR_THREAD_NEEDED
) {
2770 if (operation
== WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL
) {
2772 } else if (WQ_TIMER_IMMEDIATE_NEEDED(wq
)) {
2773 workqueue_interval_timer_trigger(wq
);
2781 wqops_thread_return(struct proc
*p
, struct workqueue
*wq
)
2783 thread_t th
= current_thread();
2784 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
2785 struct threadlist
*tl
= pthread_kern
->uthread_get_threadlist(uth
);
2787 /* reset signal mask on the workqueue thread to default state */
2788 if (pthread_kern
->uthread_get_sigmask(uth
) != (sigset_t
)(~workq_threadmask
)) {
2789 pthread_kern
->proc_lock(p
);
2790 pthread_kern
->uthread_set_sigmask(uth
, ~workq_threadmask
);
2791 pthread_kern
->proc_unlock(p
);
2794 if (wq
== NULL
|| !tl
) {
2798 PTHREAD_TRACE_WQ(TRACE_wq_override_reset
| DBG_FUNC_START
, tl
->th_workq
, 0, 0, 0, 0);
2801 * This squash call has neat semantics: it removes the specified overrides,
2802 * replacing the current requested QoS with the previous effective QoS from
2803 * those overrides. This means we won't be preempted due to having our QoS
2804 * lowered. Of course, now our understanding of the thread's QoS is wrong,
2805 * so we'll adjust below.
2807 bool was_manager
= (tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
);
2811 new_qos
= pthread_kern
->proc_usynch_thread_qos_squash_override_for_resource(th
,
2812 THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD
,
2813 THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE
);
2816 PTHREAD_TRACE_WQ(TRACE_wq_runitem
| DBG_FUNC_END
, wq
, tl
->th_priority
, 0, 0, 0);
2818 workqueue_lock_spin(wq
);
2820 if (tl
->th_flags
& TH_LIST_KEVENT_BOUND
) {
2821 unsigned int flags
= KEVENT_FLAG_WORKQ
;
2823 flags
|= KEVENT_FLAG_WORKQ_MANAGER
;
2826 tl
->th_flags
|= TH_LIST_UNBINDING
;
2827 workqueue_unlock(wq
);
2828 kevent_qos_internal_unbind(p
, class_index_get_thread_qos(tl
->th_priority
), th
, flags
);
2829 if (!(tl
->th_flags
& TH_LIST_UNBINDING
)) {
2830 _setup_wqthread(p
, th
, wq
, tl
, WQ_SETUP_CLEAR_VOUCHER
);
2831 pthread_kern
->unix_syscall_return(EJUSTRETURN
);
2832 __builtin_unreachable();
2834 workqueue_lock_spin(wq
);
2835 tl
->th_flags
&= ~(TH_LIST_KEVENT_BOUND
| TH_LIST_UNBINDING
);
2839 /* Fix up counters from the squash operation. */
2840 uint8_t old_bucket
= tl
->th_priority
;
2841 uint8_t new_bucket
= thread_qos_get_class_index(new_qos
);
2843 if (old_bucket
!= new_bucket
) {
2844 _wq_thactive_move(wq
, old_bucket
, new_bucket
);
2845 wq
->wq_thscheduled_count
[old_bucket
]--;
2846 wq
->wq_thscheduled_count
[new_bucket
]++;
2848 PTHREAD_TRACE_WQ(TRACE_wq_thread_squash
| DBG_FUNC_NONE
, wq
, tl
->th_priority
, new_bucket
, 0, 0);
2849 tl
->th_priority
= new_bucket
;
2850 PTHREAD_TRACE_WQ(TRACE_wq_override_reset
| DBG_FUNC_END
, tl
->th_workq
, new_qos
, 0, 0, 0);
2854 workqueue_run_threadreq_and_unlock(p
, wq
, tl
, NULL
, false);
2859 * Multiplexed call to interact with the workqueue mechanism
2862 _workq_kernreturn(struct proc
*p
,
2869 struct workqueue
*wq
;
2872 if (pthread_kern
->proc_get_register(p
) == 0) {
2877 case WQOPS_QUEUE_NEWSPISUPP
: {
2879 * arg2 = offset of serialno into dispatch queue
2880 * arg3 = kevent support
2884 // If we get here, then userspace has indicated support for kevent delivery.
2887 pthread_kern
->proc_set_dispatchqueue_serialno_offset(p
, (uint64_t)offset
);
2890 case WQOPS_QUEUE_REQTHREADS
: {
2892 * arg2 = number of threads to start
2895 error
= wqops_queue_reqthreads(p
, arg2
, arg3
);
2898 case WQOPS_SET_EVENT_MANAGER_PRIORITY
: {
2900 * arg2 = priority for the manager thread
2902 * if _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG is set, the
2903 * ~_PTHREAD_PRIORITY_FLAGS_MASK contains a scheduling priority instead
2906 pthread_priority_t pri
= arg2
;
2908 wq
= (struct workqueue
*)pthread_kern
->proc_get_wqptr(p
);
2913 workqueue_lock_spin(wq
);
2914 if (pri
& _PTHREAD_PRIORITY_SCHED_PRI_FLAG
){
2916 * If userspace passes a scheduling priority, that takes precidence
2917 * over any QoS. (So, userspace should take care not to accidenatally
2918 * lower the priority this way.)
2920 uint32_t sched_pri
= pri
& _PTHREAD_PRIORITY_SCHED_PRI_MASK
;
2921 if (wq
->wq_event_manager_priority
& _PTHREAD_PRIORITY_SCHED_PRI_FLAG
){
2922 wq
->wq_event_manager_priority
= MAX(sched_pri
, wq
->wq_event_manager_priority
& _PTHREAD_PRIORITY_SCHED_PRI_MASK
)
2923 | _PTHREAD_PRIORITY_SCHED_PRI_FLAG
| _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
;
2925 wq
->wq_event_manager_priority
= sched_pri
2926 | _PTHREAD_PRIORITY_SCHED_PRI_FLAG
| _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
;
2928 } else if ((wq
->wq_event_manager_priority
& _PTHREAD_PRIORITY_SCHED_PRI_FLAG
) == 0){
2929 int cur_qos
= pthread_priority_get_thread_qos(wq
->wq_event_manager_priority
);
2930 int new_qos
= pthread_priority_get_thread_qos(pri
);
2931 wq
->wq_event_manager_priority
= (uint32_t)thread_qos_get_pthread_priority(MAX(cur_qos
, new_qos
)) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
;
2933 workqueue_unlock(wq
);
2936 case WQOPS_THREAD_KEVENT_RETURN
:
2937 case WQOPS_THREAD_WORKLOOP_RETURN
:
2938 wq
= (struct workqueue
*)pthread_kern
->proc_get_wqptr(p
);
2939 PTHREAD_TRACE_WQ(TRACE_wq_runthread
| DBG_FUNC_END
, wq
, options
, 0, 0, 0);
2940 if (item
!= 0 && arg2
!= 0) {
2941 int32_t kevent_retval
;
2943 if (options
== WQOPS_THREAD_KEVENT_RETURN
) {
2944 ret
= kevent_qos_internal(p
, -1, item
, arg2
, item
, arg2
, NULL
, NULL
,
2945 KEVENT_FLAG_WORKQ
| KEVENT_FLAG_IMMEDIATE
| KEVENT_FLAG_ERROR_EVENTS
,
2947 } else /* options == WQOPS_THREAD_WORKLOOP_RETURN */ {
2948 kqueue_id_t kevent_id
= -1;
2949 ret
= kevent_id_internal(p
, &kevent_id
, item
, arg2
, item
, arg2
,
2951 KEVENT_FLAG_WORKLOOP
| KEVENT_FLAG_IMMEDIATE
| KEVENT_FLAG_ERROR_EVENTS
,
2955 * We shouldn't be getting more errors out than events we put in, so
2956 * reusing the input buffer should always provide enough space. But,
2957 * the assert is commented out since we get errors in edge cases in the
2958 * process lifecycle.
2960 //assert(ret == KERN_SUCCESS && kevent_retval >= 0);
2961 if (ret
!= KERN_SUCCESS
){
2964 } else if (kevent_retval
> 0){
2965 assert(kevent_retval
<= arg2
);
2966 *retval
= kevent_retval
;
2973 case WQOPS_THREAD_RETURN
:
2974 wq
= (struct workqueue
*)pthread_kern
->proc_get_wqptr(p
);
2975 PTHREAD_TRACE_WQ(TRACE_wq_runthread
| DBG_FUNC_END
, wq
, options
, 0, 0, 0);
2977 error
= wqops_thread_return(p
, wq
);
2978 // NOT REACHED except in case of error
2982 case WQOPS_SHOULD_NARROW
: {
2984 * arg2 = priority to test
2987 pthread_priority_t priority
= arg2
;
2988 thread_t th
= current_thread();
2989 struct threadlist
*tl
= util_get_thread_threadlist_entry(th
);
2991 if (tl
== NULL
|| (tl
->th_flags
& TH_LIST_CONSTRAINED
) == 0) {
2996 int class = pthread_priority_get_class_index(priority
);
2998 workqueue_lock_spin(wq
);
2999 bool should_narrow
= !may_start_constrained_thread(wq
, class, tl
, false);
3000 workqueue_unlock(wq
);
3002 *retval
= should_narrow
;
3011 case WQOPS_THREAD_KEVENT_RETURN
:
3012 case WQOPS_THREAD_WORKLOOP_RETURN
:
3013 case WQOPS_THREAD_RETURN
:
3014 PTHREAD_TRACE_WQ(TRACE_wq_runthread
| DBG_FUNC_START
, wq
, options
, 0, 0, 0);
3021 * We have no work to do, park ourselves on the idle list.
3023 * Consumes the workqueue lock and does not return.
3026 parkit(struct workqueue
*wq
, struct threadlist
*tl
, thread_t thread
)
3028 assert(thread
== tl
->th_thread
);
3029 assert(thread
== current_thread());
3031 PTHREAD_TRACE_WQ(TRACE_wq_thread_park
| DBG_FUNC_START
, wq
, 0, 0, 0, 0);
3033 uint32_t us_to_wait
= 0;
3035 TAILQ_REMOVE(&wq
->wq_thrunlist
, tl
, th_entry
);
3037 tl
->th_flags
&= ~TH_LIST_RUNNING
;
3038 tl
->th_flags
&= ~TH_LIST_KEVENT
;
3039 assert((tl
->th_flags
& TH_LIST_KEVENT_BOUND
) == 0);
3041 if (tl
->th_flags
& TH_LIST_CONSTRAINED
) {
3042 wq
->wq_constrained_threads_scheduled
--;
3043 tl
->th_flags
&= ~TH_LIST_CONSTRAINED
;
3046 _wq_thactive_dec(wq
, tl
->th_priority
);
3047 wq
->wq_thscheduled_count
[tl
->th_priority
]--;
3048 wq
->wq_threads_scheduled
--;
3049 uint32_t thidlecount
= ++wq
->wq_thidlecount
;
3051 pthread_kern
->thread_sched_call(thread
, NULL
);
3054 * We'd like to always have one manager thread parked so that we can have
3055 * low latency when we need to bring a manager thread up. If that idle
3056 * thread list is empty, make this thread a manager thread.
3058 * XXX: This doesn't check that there's not a manager thread outstanding,
3059 * so it's based on the assumption that most manager callouts will change
3060 * their QoS before parking. If that stops being true, this may end up
3061 * costing us more than we gain.
3063 if (TAILQ_EMPTY(&wq
->wq_thidlemgrlist
) &&
3064 tl
->th_priority
!= WORKQUEUE_EVENT_MANAGER_BUCKET
){
3065 PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority
| DBG_FUNC_NONE
,
3066 wq
, thread_tid(thread
),
3067 (tl
->th_priority
<< 16) | WORKQUEUE_EVENT_MANAGER_BUCKET
, 2, 0);
3068 reset_priority(tl
, pthread_priority_from_wq_class_index(wq
, WORKQUEUE_EVENT_MANAGER_BUCKET
));
3069 tl
->th_priority
= WORKQUEUE_EVENT_MANAGER_BUCKET
;
3072 if (tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
){
3073 TAILQ_INSERT_HEAD(&wq
->wq_thidlemgrlist
, tl
, th_entry
);
3075 TAILQ_INSERT_HEAD(&wq
->wq_thidlelist
, tl
, th_entry
);
3079 * When we remove the voucher from the thread, we may lose our importance
3080 * causing us to get preempted, so we do this after putting the thread on
3081 * the idle list. That when, when we get our importance back we'll be able
3082 * to use this thread from e.g. the kevent call out to deliver a boosting
3085 tl
->th_flags
|= TH_LIST_REMOVING_VOUCHER
;
3086 workqueue_unlock(wq
);
3087 if (pthread_kern
->thread_will_park_or_terminate
) {
3088 pthread_kern
->thread_will_park_or_terminate(tl
->th_thread
);
3090 __assert_only kern_return_t kr
;
3091 kr
= pthread_kern
->thread_set_voucher_name(MACH_PORT_NULL
);
3092 assert(kr
== KERN_SUCCESS
);
3093 workqueue_lock_spin(wq
);
3094 tl
->th_flags
&= ~(TH_LIST_REMOVING_VOUCHER
);
3096 if ((tl
->th_flags
& TH_LIST_RUNNING
) == 0) {
3097 if (thidlecount
< 101) {
3098 us_to_wait
= wq_reduce_pool_window_usecs
- ((thidlecount
-2) * (wq_reduce_pool_window_usecs
/ 100));
3100 us_to_wait
= wq_reduce_pool_window_usecs
/ 100;
3103 thread_set_pending_block_hint(thread
, kThreadWaitParkedWorkQueue
);
3104 assert_wait_timeout_with_leeway((caddr_t
)tl
, (THREAD_INTERRUPTIBLE
),
3105 TIMEOUT_URGENCY_SYS_BACKGROUND
|TIMEOUT_URGENCY_LEEWAY
, us_to_wait
,
3106 wq_reduce_pool_window_usecs
/10, NSEC_PER_USEC
);
3108 workqueue_unlock(wq
);
3110 thread_block(wq_unpark_continue
);
3111 panic("thread_block(wq_unpark_continue) returned!");
3113 workqueue_unlock(wq
);
3116 * While we'd dropped the lock to unset our voucher, someone came
3117 * around and made us runnable. But because we weren't waiting on the
3118 * event their wakeup() was ineffectual. To correct for that, we just
3119 * run the continuation ourselves.
3121 wq_unpark_continue(NULL
, THREAD_AWAKENED
);
3126 may_start_constrained_thread(struct workqueue
*wq
, uint32_t at_priclass
,
3127 struct threadlist
*tl
, bool may_start_timer
)
3129 uint32_t req_qos
= _wq_thactive_best_constrained_req_qos(wq
);
3130 wq_thactive_t thactive
;
3132 if (may_start_timer
&& at_priclass
< req_qos
) {
3134 * When called from workqueue_run_threadreq_and_unlock() pre-post newest
3135 * higher priorities into the thactive state so that
3136 * workqueue_callback() takes the right decision.
3138 * If the admission check passes, workqueue_run_threadreq_and_unlock
3139 * will reset this value before running the request.
3141 thactive
= _wq_thactive_set_best_constrained_req_qos(wq
, req_qos
,
3144 PTHREAD_TRACE_WQ(TRACE_wq_thactive_update
, 1, (uint64_t)thactive
,
3145 (uint64_t)(thactive
>> 64), 0, 0);
3148 thactive
= _wq_thactive(wq
);
3151 uint32_t constrained_threads
= wq
->wq_constrained_threads_scheduled
;
3152 if (tl
&& (tl
->th_flags
& TH_LIST_CONSTRAINED
)) {
3154 * don't count the current thread as scheduled
3156 constrained_threads
--;
3158 if (constrained_threads
>= wq_max_constrained_threads
) {
3159 PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission
| DBG_FUNC_NONE
, wq
, 1,
3160 wq
->wq_constrained_threads_scheduled
,
3161 wq_max_constrained_threads
, 0);
3163 * we need 1 or more constrained threads to return to the kernel before
3164 * we can dispatch additional work
3170 * Compute a metric for many how many threads are active. We find the
3171 * highest priority request outstanding and then add up the number of
3172 * active threads in that and all higher-priority buckets. We'll also add
3173 * any "busy" threads which are not active but blocked recently enough that
3174 * we can't be sure they've gone idle yet. We'll then compare this metric
3175 * to our max concurrency to decide whether to add a new thread.
3178 uint32_t busycount
, thactive_count
;
3180 thactive_count
= _wq_thactive_aggregate_downto_qos(wq
, thactive
,
3181 at_priclass
, &busycount
, NULL
);
3183 if (tl
&& tl
->th_priority
<= at_priclass
) {
3185 * don't count this thread as currently active
3187 assert(thactive_count
> 0);
3191 if (thactive_count
+ busycount
< wq_max_concurrency
[at_priclass
]) {
3192 PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission
| DBG_FUNC_NONE
, wq
, 2,
3193 thactive_count
, busycount
, 0);
3196 PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission
| DBG_FUNC_NONE
, wq
, 3,
3197 thactive_count
, busycount
, 0);
3200 if (busycount
&& may_start_timer
) {
3202 * If this is called from the add timer, we won't have another timer
3203 * fire when the thread exits the "busy" state, so rearm the timer.
3205 if (WQ_TIMER_DELAYED_NEEDED(wq
)) {
3206 workqueue_interval_timer_start(wq
);
3213 static struct threadlist
*
3214 pop_from_thidlelist(struct workqueue
*wq
, uint32_t priclass
)
3216 assert(wq
->wq_thidlecount
);
3218 struct threadlist
*tl
= NULL
;
3220 if (!TAILQ_EMPTY(&wq
->wq_thidlemgrlist
) &&
3221 (priclass
== WORKQUEUE_EVENT_MANAGER_BUCKET
|| TAILQ_EMPTY(&wq
->wq_thidlelist
))){
3222 tl
= TAILQ_FIRST(&wq
->wq_thidlemgrlist
);
3223 TAILQ_REMOVE(&wq
->wq_thidlemgrlist
, tl
, th_entry
);
3224 assert(tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
);
3225 } else if (!TAILQ_EMPTY(&wq
->wq_thidlelist
) &&
3226 (priclass
!= WORKQUEUE_EVENT_MANAGER_BUCKET
|| TAILQ_EMPTY(&wq
->wq_thidlemgrlist
))){
3227 tl
= TAILQ_FIRST(&wq
->wq_thidlelist
);
3228 TAILQ_REMOVE(&wq
->wq_thidlelist
, tl
, th_entry
);
3229 assert(tl
->th_priority
!= WORKQUEUE_EVENT_MANAGER_BUCKET
);
3231 panic("pop_from_thidlelist called with no threads available");
3233 assert((tl
->th_flags
& TH_LIST_RUNNING
) == 0);
3235 assert(wq
->wq_thidlecount
);
3236 wq
->wq_thidlecount
--;
3238 TAILQ_INSERT_TAIL(&wq
->wq_thrunlist
, tl
, th_entry
);
3240 tl
->th_flags
|= TH_LIST_RUNNING
| TH_LIST_BUSY
;
3242 wq
->wq_threads_scheduled
++;
3243 wq
->wq_thscheduled_count
[priclass
]++;
3244 _wq_thactive_inc(wq
, priclass
);
3248 static pthread_priority_t
3249 pthread_priority_from_wq_class_index(struct workqueue
*wq
, int index
)
3251 if (index
== WORKQUEUE_EVENT_MANAGER_BUCKET
){
3252 return wq
->wq_event_manager_priority
;
3254 return class_index_get_pthread_priority(index
);
3259 reset_priority(struct threadlist
*tl
, pthread_priority_t pri
)
3262 thread_t th
= tl
->th_thread
;
3264 if ((pri
& _PTHREAD_PRIORITY_SCHED_PRI_FLAG
) == 0){
3265 ret
= pthread_kern
->thread_set_workq_qos(th
, pthread_priority_get_thread_qos(pri
), 0);
3266 assert(ret
== KERN_SUCCESS
|| ret
== KERN_TERMINATED
);
3268 if (tl
->th_flags
& TH_LIST_EVENT_MGR_SCHED_PRI
) {
3270 /* Reset priority to default (masked by QoS) */
3272 ret
= pthread_kern
->thread_set_workq_pri(th
, 31, POLICY_TIMESHARE
);
3273 assert(ret
== KERN_SUCCESS
|| ret
== KERN_TERMINATED
);
3275 tl
->th_flags
&= ~TH_LIST_EVENT_MGR_SCHED_PRI
;
3278 ret
= pthread_kern
->thread_set_workq_qos(th
, THREAD_QOS_UNSPECIFIED
, 0);
3279 assert(ret
== KERN_SUCCESS
|| ret
== KERN_TERMINATED
);
3280 ret
= pthread_kern
->thread_set_workq_pri(th
, (pri
& (~_PTHREAD_PRIORITY_FLAGS_MASK
)), POLICY_TIMESHARE
);
3281 assert(ret
== KERN_SUCCESS
|| ret
== KERN_TERMINATED
);
3283 tl
->th_flags
|= TH_LIST_EVENT_MGR_SCHED_PRI
;
3288 * Picks the best request to run, and returns the best overcommit fallback
3289 * if the best pick is non overcommit and risks failing its admission check.
3291 static struct threadreq
*
3292 workqueue_best_threadreqs(struct workqueue
*wq
, struct threadlist
*tl
,
3293 struct threadreq
**fallback
)
3295 struct threadreq
*req
, *best_req
= NULL
;
3296 int priclass
, prilimit
;
3298 if ((wq
->wq_event_manager_threadreq
.tr_state
== TR_STATE_WAITING
) &&
3299 ((wq
->wq_thscheduled_count
[WORKQUEUE_EVENT_MANAGER_BUCKET
] == 0) ||
3300 (tl
&& tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
))) {
3302 * There's an event manager request and either:
3303 * - no event manager currently running
3304 * - we are re-using the event manager
3306 req
= &wq
->wq_event_manager_threadreq
;
3307 PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select
| DBG_FUNC_NONE
, wq
, req
, 1, 0, 0);
3312 prilimit
= WORKQUEUE_EVENT_MANAGER_BUCKET
;
3314 prilimit
= _wq_highest_paced_priority(wq
);
3316 for (priclass
= 0; priclass
< prilimit
; priclass
++) {
3317 req
= TAILQ_FIRST(&wq
->wq_overcommit_reqlist
[priclass
]);
3319 PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select
| DBG_FUNC_NONE
, wq
, req
, 2, 0, 0);
3328 best_req
= TAILQ_FIRST(&wq
->wq_reqlist
[priclass
]);
3330 PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select
| DBG_FUNC_NONE
, wq
, best_req
, 3, 0, 0);
3338 * Runs a thread request on a thread
3340 * - if thread is THREAD_NULL, will find a thread and run the request there.
3341 * Otherwise, the thread must be the current thread.
3343 * - if req is NULL, will find the highest priority request and run that. If
3344 * it is not NULL, it must be a threadreq object in state NEW. If it can not
3345 * be run immediately, it will be enqueued and moved to state WAITING.
3347 * Either way, the thread request object serviced will be moved to state
3348 * PENDING and attached to the threadlist.
3350 * Should be called with the workqueue lock held. Will drop it.
3352 * WARNING: _workq_kevent_reqthreads needs to be able to preflight any
3353 * admission checks in this function. If you are changing this function,
3354 * keep that one up-to-date.
3356 * - if parking_tl is non NULL, then the current thread is parking. This will
3357 * try to reuse this thread for a request. If no match is found, it will be
3361 workqueue_run_threadreq_and_unlock(proc_t p
, struct workqueue
*wq
,
3362 struct threadlist
*parking_tl
, struct threadreq
*req
,
3363 bool may_add_new_thread
)
3365 struct threadreq
*incoming_req
= req
;
3367 struct threadlist
*tl
= parking_tl
;
3368 int rc
= WQ_RUN_TR_THROTTLED
;
3370 assert(tl
== NULL
|| tl
->th_thread
== current_thread());
3371 assert(req
== NULL
|| req
->tr_state
== TR_STATE_NEW
);
3372 assert(!may_add_new_thread
|| !tl
);
3374 PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq
| DBG_FUNC_START
, wq
, req
,
3375 tl
? thread_tid(tl
->th_thread
) : 0,
3376 req
? (req
->tr_priority
<< 16 | req
->tr_flags
) : 0, 0);
3379 * Special cases when provided an event manager request
3381 if (req
&& req
->tr_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
) {
3382 // Clients must not rely on identity of event manager requests
3383 assert(req
->tr_flags
& TR_FLAG_ONSTACK
);
3384 // You can't be both overcommit and event manager
3385 assert((req
->tr_flags
& TR_FLAG_OVERCOMMIT
) == 0);
3388 * We can only ever have one event manager request, so coalesce them if
3389 * there's already one outstanding.
3391 if (wq
->wq_event_manager_threadreq
.tr_state
== TR_STATE_WAITING
) {
3392 PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_mgr_merge
| DBG_FUNC_NONE
, wq
, req
, 0, 0, 0);
3394 struct threadreq
*existing_req
= &wq
->wq_event_manager_threadreq
;
3395 if (req
->tr_flags
& TR_FLAG_KEVENT
) {
3396 existing_req
->tr_flags
|= TR_FLAG_KEVENT
;
3400 incoming_req
= NULL
;
3403 if (wq
->wq_thscheduled_count
[WORKQUEUE_EVENT_MANAGER_BUCKET
] &&
3404 (!tl
|| tl
->th_priority
!= WORKQUEUE_EVENT_MANAGER_BUCKET
)){
3406 * There can only be one event manager running at a time.
3408 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq
| DBG_FUNC_END
, wq
, 1, 0, 0, 0);
3413 again
: // Start again after creating a thread
3415 if (_wq_exiting(wq
)) {
3416 rc
= WQ_RUN_TR_EXITING
;
3421 * Thread request selection and admission control
3423 struct threadreq
*fallback
= NULL
;
3425 if ((req
->tr_flags
& TR_FLAG_NO_PACING
) == 0 &&
3426 _wq_should_pace_priority(wq
, req
->tr_priority
)) {
3428 * If a request fails the pacing admission check, then thread
3429 * requests are redriven when the pacing thread is finally scheduled
3430 * when it calls _wq_pacing_end() in wq_unpark_continue().
3434 } else if (wq
->wq_reqcount
== 0) {
3435 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq
| DBG_FUNC_END
, wq
, 2, 0, 0, 0);
3437 } else if ((req
= workqueue_best_threadreqs(wq
, tl
, &fallback
)) == NULL
) {
3438 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq
| DBG_FUNC_END
, wq
, 3, 0, 0, 0);
3442 if ((req
->tr_flags
& TR_FLAG_OVERCOMMIT
) == 0 &&
3443 (req
->tr_priority
< WORKQUEUE_EVENT_MANAGER_BUCKET
)) {
3444 if (!may_start_constrained_thread(wq
, req
->tr_priority
, parking_tl
, true)) {
3446 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq
| DBG_FUNC_END
, wq
, 4, 0, 0, 0);
3449 assert(req
->tr_state
== TR_STATE_WAITING
);
3458 if (tl
->th_priority
!= req
->tr_priority
) {
3459 _wq_thactive_move(wq
, tl
->th_priority
, req
->tr_priority
);
3460 wq
->wq_thscheduled_count
[tl
->th_priority
]--;
3461 wq
->wq_thscheduled_count
[req
->tr_priority
]++;
3463 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select
| DBG_FUNC_NONE
,
3464 wq
, 1, thread_tid(tl
->th_thread
), 0, 0);
3465 } else if (wq
->wq_thidlecount
) {
3466 tl
= pop_from_thidlelist(wq
, req
->tr_priority
);
3468 * This call will update wq_thscheduled_count and wq_thactive_count for
3469 * the provided priority. It will not set the returned thread to that
3470 * priority. This matches the behavior of the parking_tl clause above.
3472 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select
| DBG_FUNC_NONE
,
3473 wq
, 2, thread_tid(tl
->th_thread
), 0, 0);
3474 } else /* no idle threads */ {
3475 if (!may_add_new_thread
|| wq
->wq_nthreads
>= wq_max_threads
) {
3476 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq
| DBG_FUNC_END
, wq
, 5,
3477 may_add_new_thread
, wq
->wq_nthreads
, 0);
3478 if (wq
->wq_nthreads
< wq_max_threads
) {
3479 rc
= WQ_RUN_TR_THREAD_NEEDED
;
3484 bool added_thread
= workqueue_addnewthread(p
, wq
);
3486 * workqueue_addnewthread will drop and re-take the lock, so we
3487 * need to ensure we still have a cached request.
3489 * It also means we have to pick a new request, since our old pick may
3490 * not be valid anymore.
3493 if (req
&& (req
->tr_flags
& TR_FLAG_ONSTACK
)) {
3494 _threadreq_copy_prepare(wq
);
3498 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select
| DBG_FUNC_NONE
,
3501 } else if (_wq_exiting(wq
)) {
3502 rc
= WQ_RUN_TR_EXITING
;
3505 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq
| DBG_FUNC_END
, wq
, 6, 0, 0, 0);
3507 * Something caused thread creation to fail. Kick off the timer in
3508 * the hope that it'll succeed next time.
3510 if (WQ_TIMER_DELAYED_NEEDED(wq
)) {
3511 workqueue_interval_timer_start(wq
);
3518 * Setup thread, mark request as complete and run with it.
3520 if (req
->tr_state
== TR_STATE_WAITING
) {
3521 _threadreq_dequeue(wq
, req
);
3523 if (tl
->th_priority
!= req
->tr_priority
) {
3524 PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority
| DBG_FUNC_NONE
,
3525 wq
, thread_tid(tl
->th_thread
),
3526 (tl
->th_priority
<< 16) | req
->tr_priority
, 1, 0);
3527 reset_priority(tl
, pthread_priority_from_wq_class_index(wq
, req
->tr_priority
));
3528 tl
->th_priority
= (uint8_t)req
->tr_priority
;
3530 if (req
->tr_flags
& TR_FLAG_OVERCOMMIT
) {
3531 if ((tl
->th_flags
& TH_LIST_CONSTRAINED
) != 0) {
3532 tl
->th_flags
&= ~TH_LIST_CONSTRAINED
;
3533 wq
->wq_constrained_threads_scheduled
--;
3536 if ((tl
->th_flags
& TH_LIST_CONSTRAINED
) == 0) {
3537 tl
->th_flags
|= TH_LIST_CONSTRAINED
;
3538 wq
->wq_constrained_threads_scheduled
++;
3542 if (!parking_tl
&& !(req
->tr_flags
& TR_FLAG_NO_PACING
)) {
3543 _wq_pacing_start(wq
, tl
);
3545 if ((req
->tr_flags
& TR_FLAG_OVERCOMMIT
) == 0) {
3546 uint32_t old_qos
, new_qos
;
3549 * If we are scheduling a constrained thread request, we may need to
3550 * update the best constrained qos in the thactive atomic state.
3552 for (new_qos
= 0; new_qos
< WQ_THACTIVE_NO_PENDING_REQUEST
; new_qos
++) {
3553 if (TAILQ_FIRST(&wq
->wq_reqlist
[new_qos
]))
3556 old_qos
= _wq_thactive_best_constrained_req_qos(wq
);
3557 if (old_qos
!= new_qos
) {
3558 wq_thactive_t v
= _wq_thactive_set_best_constrained_req_qos(wq
,
3561 PTHREAD_TRACE_WQ(TRACE_wq_thactive_update
, 2, (uint64_t)v
,
3562 (uint64_t)(v
>> 64), 0, 0);
3564 PTHREAD_TRACE_WQ(TRACE_wq_thactive_update
, 2, v
, 0, 0, 0);
3569 uint32_t upcall_flags
= WQ_FLAG_THREAD_NEWSPI
;
3570 if (req
->tr_flags
& TR_FLAG_OVERCOMMIT
)
3571 upcall_flags
|= WQ_FLAG_THREAD_OVERCOMMIT
;
3572 if (req
->tr_flags
& TR_FLAG_KEVENT
)
3573 upcall_flags
|= WQ_FLAG_THREAD_KEVENT
;
3574 if (req
->tr_flags
& TR_FLAG_WORKLOOP
)
3575 upcall_flags
|= WQ_FLAG_THREAD_WORKLOOP
| WQ_FLAG_THREAD_KEVENT
;
3576 if (tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
)
3577 upcall_flags
|= WQ_FLAG_THREAD_EVENT_MANAGER
;
3578 tl
->th_upcall_flags
= upcall_flags
>> WQ_FLAG_THREAD_PRIOSHIFT
;
3580 if (req
->tr_flags
& TR_FLAG_KEVENT
) {
3581 tl
->th_flags
|= TH_LIST_KEVENT
;
3583 tl
->th_flags
&= ~TH_LIST_KEVENT
;
3585 return _threadreq_complete_and_unlock(p
, wq
, req
, tl
);
3589 _threadreq_enqueue(wq
, incoming_req
);
3594 if (parking_tl
&& !(parking_tl
->th_flags
& TH_LIST_UNBINDING
)) {
3595 parkit(wq
, parking_tl
, parking_tl
->th_thread
);
3596 __builtin_unreachable();
3599 workqueue_unlock(wq
);
3605 * parked thread wakes up
3608 wq_unpark_continue(void* __unused ptr
, wait_result_t wait_result
)
3610 boolean_t first_use
= false;
3611 thread_t th
= current_thread();
3612 proc_t p
= current_proc();
3614 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
3615 if (uth
== NULL
) goto done
;
3617 struct workqueue
*wq
= pthread_kern
->proc_get_wqptr(p
);
3618 if (wq
== NULL
) goto done
;
3620 workqueue_lock_spin(wq
);
3622 struct threadlist
*tl
= pthread_kern
->uthread_get_threadlist(uth
);
3623 assert(tl
!= WQ_THREADLIST_EXITING_POISON
);
3626 * We woke up before addnewthread() was finished setting us up. Go
3627 * ahead and exit, but before we do poison the threadlist variable so
3628 * that addnewthread() doesn't think we are valid still.
3630 pthread_kern
->uthread_set_threadlist(uth
, WQ_THREADLIST_EXITING_POISON
);
3631 workqueue_unlock(wq
);
3635 assert(tl
->th_flags
& TH_LIST_INITED
);
3637 if ((tl
->th_flags
& TH_LIST_NEW
)){
3638 tl
->th_flags
&= ~(TH_LIST_NEW
);
3642 if ((tl
->th_flags
& (TH_LIST_RUNNING
| TH_LIST_BUSY
)) == TH_LIST_RUNNING
) {
3644 * The normal wakeup path.
3646 goto return_to_user
;
3649 if ((tl
->th_flags
& TH_LIST_RUNNING
) == 0 &&
3650 wait_result
== THREAD_TIMED_OUT
&&
3651 tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
&&
3652 TAILQ_FIRST(&wq
->wq_thidlemgrlist
) == tl
&&
3653 TAILQ_NEXT(tl
, th_entry
) == NULL
){
3655 * If we are the only idle manager and we pop'ed for self-destruction,
3656 * then don't actually exit. Instead, free our stack to save some
3657 * memory and re-park.
3660 workqueue_unlock(wq
);
3662 vm_map_t vmap
= wq
->wq_map
;
3664 // Keep this in sync with _setup_wqthread()
3665 const vm_size_t guardsize
= vm_map_page_size(vmap
);
3666 const user_addr_t freeaddr
= (user_addr_t
)tl
->th_stackaddr
+ guardsize
;
3667 const vm_map_offset_t freesize
= vm_map_trunc_page_mask((PTH_DEFAULT_STACKSIZE
+ guardsize
+ PTHREAD_T_OFFSET
) - 1, vm_map_page_mask(vmap
)) - guardsize
;
3670 kr
= mach_vm_behavior_set(vmap
, freeaddr
, freesize
, VM_BEHAVIOR_REUSABLE
);
3671 assert(kr
== KERN_SUCCESS
|| kr
== KERN_INVALID_ADDRESS
);
3673 workqueue_lock_spin(wq
);
3675 if ( !(tl
->th_flags
& TH_LIST_RUNNING
)) {
3676 thread_set_pending_block_hint(th
, kThreadWaitParkedWorkQueue
);
3677 assert_wait((caddr_t
)tl
, (THREAD_INTERRUPTIBLE
));
3679 workqueue_unlock(wq
);
3681 thread_block(wq_unpark_continue
);
3682 __builtin_unreachable();
3686 if ((tl
->th_flags
& TH_LIST_RUNNING
) == 0) {
3687 assert((tl
->th_flags
& TH_LIST_BUSY
) == 0);
3689 PTHREAD_TRACE_WQ(TRACE_wq_thread_park
| DBG_FUNC_END
, wq
, 0, 0, 0, 0);
3692 * We were set running, but not for the purposes of actually running.
3693 * This could be because the timer elapsed. Or it could be because the
3694 * thread aborted. Either way, we need to return to userspace to exit.
3696 * The call to workqueue_removethread will consume the lock.
3700 (tl
->th_priority
< qos_class_get_class_index(WQ_THREAD_CLEANUP_QOS
) ||
3701 (tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
))) {
3702 // Reset the QoS to something low for the pthread cleanup
3703 PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority
| DBG_FUNC_NONE
,
3705 (tl
->th_priority
<< 16) | qos_class_get_class_index(WQ_THREAD_CLEANUP_QOS
), 3, 0);
3706 pthread_priority_t cleanup_pri
= _pthread_priority_make_newest(WQ_THREAD_CLEANUP_QOS
, 0, 0);
3707 reset_priority(tl
, cleanup_pri
);
3710 workqueue_removethread(tl
, 0, first_use
);
3713 pthread_kern
->thread_bootstrap_return();
3715 pthread_kern
->unix_syscall_return(0);
3717 __builtin_unreachable();
3721 * The timer woke us up or the thread was aborted. However, we have
3722 * already started to make this a runnable thread. Wait for that to
3723 * finish, then continue to userspace.
3725 while ((tl
->th_flags
& TH_LIST_BUSY
)) {
3726 assert_wait((caddr_t
)tl
, (THREAD_UNINT
));
3728 workqueue_unlock(wq
);
3730 thread_block(THREAD_CONTINUE_NULL
);
3732 workqueue_lock_spin(wq
);
3737 PTHREAD_TRACE_WQ(TRACE_wq_thread_park
| DBG_FUNC_END
, wq
, 0, 0, 0, 0);
3739 if (_wq_pacing_end(wq
, tl
) && wq
->wq_reqcount
) {
3740 workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, NULL
, true);
3742 workqueue_unlock(wq
);
3744 _setup_wqthread(p
, th
, wq
, tl
, first_use
? WQ_SETUP_FIRST_USE
: 0);
3745 pthread_kern
->thread_sched_call(th
, workqueue_callback
);
3748 pthread_kern
->thread_bootstrap_return();
3750 pthread_kern
->unix_syscall_return(EJUSTRETURN
);
3752 panic("Our attempt to return to userspace failed...");
3756 * configures initial thread stack/registers to jump into:
3757 * _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int upcall_flags, int nkevents);
3758 * to get there we jump through assembily stubs in pthread_asm.s. Those
3759 * routines setup a stack frame, using the current stack pointer, and marshall
3760 * arguments from registers to the stack as required by the ABI.
3762 * One odd thing we do here is to start the pthread_t 4k below what would be the
3763 * top of the stack otherwise. This is because usually only the first 4k of the
3764 * pthread_t will be used and so we want to put it on the same 16k page as the
3765 * top of the stack to save memory.
3767 * When we are done the stack will look like:
3768 * |-----------| th_stackaddr + th_allocsize
3769 * |pthread_t | th_stackaddr + DEFAULT_STACKSIZE + guardsize + PTHREAD_STACK_OFFSET
3770 * |kevent list| optionally - at most WQ_KEVENT_LIST_LEN events
3771 * |kevent data| optionally - at most WQ_KEVENT_DATA_SIZE bytes
3772 * |stack gap | bottom aligned to 16 bytes, and at least as big as stack_gap_min
3776 * |guard page | guardsize
3777 * |-----------| th_stackaddr
3780 _setup_wqthread(proc_t p
, thread_t th
, struct workqueue
*wq
,
3781 struct threadlist
*tl
, int setup_flags
)
3784 if (setup_flags
& WQ_SETUP_CLEAR_VOUCHER
) {
3786 * For preemption reasons, we want to reset the voucher as late as
3787 * possible, so we do it in two places:
3788 * - Just before parking (i.e. in parkit())
3789 * - Prior to doing the setup for the next workitem (i.e. here)
3791 * Those two places are sufficient to ensure we always reset it before
3792 * it goes back out to user space, but be careful to not break that
3795 __assert_only kern_return_t kr
;
3796 kr
= pthread_kern
->thread_set_voucher_name(MACH_PORT_NULL
);
3797 assert(kr
== KERN_SUCCESS
);
3800 uint32_t upcall_flags
= tl
->th_upcall_flags
<< WQ_FLAG_THREAD_PRIOSHIFT
;
3801 if (!(setup_flags
& WQ_SETUP_FIRST_USE
)) {
3802 upcall_flags
|= WQ_FLAG_THREAD_REUSE
;
3806 * Put the QoS class value into the lower bits of the reuse_thread register, this is where
3807 * the thread priority used to be stored anyway.
3809 pthread_priority_t priority
= pthread_priority_from_wq_class_index(wq
, tl
->th_priority
);
3810 upcall_flags
|= (_pthread_priority_get_qos_newest(priority
) & WQ_FLAG_THREAD_PRIOMASK
);
3812 const vm_size_t guardsize
= vm_map_page_size(tl
->th_workq
->wq_map
);
3813 const vm_size_t stack_gap_min
= (proc_is64bit(p
) == 0) ? C_32_STK_ALIGN
: C_64_REDZONE_LEN
;
3814 const vm_size_t stack_align_min
= (proc_is64bit(p
) == 0) ? C_32_STK_ALIGN
: C_64_STK_ALIGN
;
3816 user_addr_t pthread_self_addr
= (user_addr_t
)(tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ guardsize
+ PTHREAD_T_OFFSET
);
3817 user_addr_t stack_top_addr
= (user_addr_t
)((pthread_self_addr
- stack_gap_min
) & -stack_align_min
);
3818 user_addr_t stack_bottom_addr
= (user_addr_t
)(tl
->th_stackaddr
+ guardsize
);
3820 user_addr_t wqstart_fnptr
= pthread_kern
->proc_get_wqthread(p
);
3821 if (!wqstart_fnptr
) {
3822 panic("workqueue thread start function pointer is NULL");
3825 if (setup_flags
& WQ_SETUP_FIRST_USE
) {
3826 uint32_t tsd_offset
= pthread_kern
->proc_get_pthread_tsd_offset(p
);
3828 mach_vm_offset_t th_tsd_base
= (mach_vm_offset_t
)pthread_self_addr
+ tsd_offset
;
3829 kern_return_t kret
= pthread_kern
->thread_set_tsd_base(th
, th_tsd_base
);
3830 if (kret
== KERN_SUCCESS
) {
3831 upcall_flags
|= WQ_FLAG_THREAD_TSD_BASE_SET
;
3836 * Pre-fault the first page of the new thread's stack and the page that will
3837 * contain the pthread_t structure.
3839 vm_map_t vmap
= pthread_kern
->current_map();
3840 if (vm_map_trunc_page_mask((vm_map_offset_t
)(stack_top_addr
- C_64_REDZONE_LEN
), vm_map_page_mask(vmap
)) !=
3841 vm_map_trunc_page_mask((vm_map_offset_t
)pthread_self_addr
, vm_map_page_mask(vmap
))){
3843 vm_map_trunc_page_mask((vm_map_offset_t
)(stack_top_addr
- C_64_REDZONE_LEN
), vm_map_page_mask(vmap
)),
3844 VM_PROT_READ
| VM_PROT_WRITE
,
3846 THREAD_UNINT
, NULL
, 0);
3849 vm_map_trunc_page_mask((vm_map_offset_t
)pthread_self_addr
, vm_map_page_mask(vmap
)),
3850 VM_PROT_READ
| VM_PROT_WRITE
,
3852 THREAD_UNINT
, NULL
, 0);
3855 user_addr_t kevent_list
= NULL
;
3856 int kevent_count
= 0;
3857 if (upcall_flags
& WQ_FLAG_THREAD_KEVENT
){
3858 bool workloop
= upcall_flags
& WQ_FLAG_THREAD_WORKLOOP
;
3860 kevent_list
= pthread_self_addr
- WQ_KEVENT_LIST_LEN
* sizeof(struct kevent_qos_s
);
3861 kevent_count
= WQ_KEVENT_LIST_LEN
;
3863 user_addr_t kevent_id_addr
= kevent_list
;
3866 * The kevent ID goes just below the kevent list. Sufficiently new
3867 * userspace will know to look there. Old userspace will just
3870 kevent_id_addr
-= sizeof(kqueue_id_t
);
3873 user_addr_t kevent_data_buf
= kevent_id_addr
- WQ_KEVENT_DATA_SIZE
;
3874 user_size_t kevent_data_available
= WQ_KEVENT_DATA_SIZE
;
3876 int32_t events_out
= 0;
3878 assert(tl
->th_flags
| TH_LIST_KEVENT_BOUND
);
3879 unsigned int flags
= KEVENT_FLAG_STACK_DATA
| KEVENT_FLAG_IMMEDIATE
;
3880 if (tl
->th_priority
== WORKQUEUE_EVENT_MANAGER_BUCKET
) {
3881 flags
|= KEVENT_FLAG_WORKQ_MANAGER
;
3885 flags
|= KEVENT_FLAG_WORKLOOP
;
3886 kqueue_id_t kevent_id
= -1;
3887 ret
= kevent_id_internal(p
, &kevent_id
,
3888 NULL
, 0, kevent_list
, kevent_count
,
3889 kevent_data_buf
, &kevent_data_available
,
3890 flags
, &events_out
);
3891 copyout(&kevent_id
, kevent_id_addr
, sizeof(kevent_id
));
3893 flags
|= KEVENT_FLAG_WORKQ
;
3894 ret
= kevent_qos_internal(p
,
3895 class_index_get_thread_qos(tl
->th_priority
),
3896 NULL
, 0, kevent_list
, kevent_count
,
3897 kevent_data_buf
, &kevent_data_available
,
3898 flags
, &events_out
);
3901 // squash any errors into just empty output
3902 if (ret
!= KERN_SUCCESS
|| events_out
== -1){
3904 kevent_data_available
= WQ_KEVENT_DATA_SIZE
;
3907 // We shouldn't get data out if there aren't events available
3908 assert(events_out
!= 0 || kevent_data_available
== WQ_KEVENT_DATA_SIZE
);
3910 if (events_out
> 0){
3911 if (kevent_data_available
== WQ_KEVENT_DATA_SIZE
){
3912 stack_top_addr
= (kevent_id_addr
- stack_gap_min
) & -stack_align_min
;
3914 stack_top_addr
= (kevent_data_buf
+ kevent_data_available
- stack_gap_min
) & -stack_align_min
;
3917 kevent_count
= events_out
;
3924 PTHREAD_TRACE_WQ(TRACE_wq_runthread
| DBG_FUNC_START
, wq
, 0, 0, 0, 0);
3926 #if defined(__i386__) || defined(__x86_64__)
3927 if (proc_is64bit(p
) == 0) {
3928 x86_thread_state32_t state
= {
3929 .eip
= (unsigned int)wqstart_fnptr
,
3930 .eax
= /* arg0 */ (unsigned int)pthread_self_addr
,
3931 .ebx
= /* arg1 */ (unsigned int)tl
->th_thport
,
3932 .ecx
= /* arg2 */ (unsigned int)stack_bottom_addr
,
3933 .edx
= /* arg3 */ (unsigned int)kevent_list
,
3934 .edi
= /* arg4 */ (unsigned int)upcall_flags
,
3935 .esi
= /* arg5 */ (unsigned int)kevent_count
,
3937 .esp
= (int)((vm_offset_t
)stack_top_addr
),
3940 error
= pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
3941 if (error
!= KERN_SUCCESS
) {
3942 panic(__func__
": thread_set_wq_state failed: %d", error
);
3945 x86_thread_state64_t state64
= {
3946 // x86-64 already passes all the arguments in registers, so we just put them in their final place here
3947 .rip
= (uint64_t)wqstart_fnptr
,
3948 .rdi
= (uint64_t)pthread_self_addr
,
3949 .rsi
= (uint64_t)tl
->th_thport
,
3950 .rdx
= (uint64_t)stack_bottom_addr
,
3951 .rcx
= (uint64_t)kevent_list
,
3952 .r8
= (uint64_t)upcall_flags
,
3953 .r9
= (uint64_t)kevent_count
,
3955 .rsp
= (uint64_t)(stack_top_addr
)
3958 error
= pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state64
);
3959 if (error
!= KERN_SUCCESS
) {
3960 panic(__func__
": thread_set_wq_state failed: %d", error
);
3964 #error setup_wqthread not defined for this architecture
3969 static int wq_kevent_test SYSCTL_HANDLER_ARGS
{
3970 //(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req)
3971 #pragma unused(oidp, arg1, arg2)
3973 struct workq_reqthreads_req_s requests
[64] = {};
3975 if (req
->newlen
> sizeof(requests
) || req
->newlen
< sizeof(struct workq_reqthreads_req_s
))
3978 error
= copyin(req
->newptr
, requests
, req
->newlen
);
3979 if (error
) return error
;
3981 _workq_reqthreads(req
->p
, (int)(req
->newlen
/ sizeof(struct workq_reqthreads_req_s
)), requests
);
3990 _fill_procworkqueue(proc_t p
, struct proc_workqueueinfo
* pwqinfo
)
3992 struct workqueue
* wq
;
3996 if ((wq
= pthread_kern
->proc_get_wqptr(p
)) == NULL
) {
4001 * This is sometimes called from interrupt context by the kperf sampler.
4002 * In that case, it's not safe to spin trying to take the lock since we
4003 * might already hold it. So, we just try-lock it and error out if it's
4004 * already held. Since this is just a debugging aid, and all our callers
4005 * are able to handle an error, that's fine.
4007 bool locked
= workqueue_lock_try(wq
);
4012 activecount
= _wq_thactive_aggregate_downto_qos(wq
, _wq_thactive(wq
),
4013 WORKQUEUE_NUM_BUCKETS
- 1, NULL
, NULL
);
4014 pwqinfo
->pwq_nthreads
= wq
->wq_nthreads
;
4015 pwqinfo
->pwq_runthreads
= activecount
;
4016 pwqinfo
->pwq_blockedthreads
= wq
->wq_threads_scheduled
- activecount
;
4017 pwqinfo
->pwq_state
= 0;
4019 if (wq
->wq_constrained_threads_scheduled
>= wq_max_constrained_threads
) {
4020 pwqinfo
->pwq_state
|= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT
;
4023 if (wq
->wq_nthreads
>= wq_max_threads
) {
4024 pwqinfo
->pwq_state
|= WQ_EXCEEDED_TOTAL_THREAD_LIMIT
;
4027 workqueue_unlock(wq
);
4032 _get_pwq_state_kdp(proc_t p
)
4038 struct workqueue
*wq
= pthread_kern
->proc_get_wqptr(p
);
4040 if (wq
== NULL
|| workqueue_lock_spin_is_acquired_kdp(wq
)) {
4044 uint32_t pwq_state
= WQ_FLAGS_AVAILABLE
;
4046 if (wq
->wq_constrained_threads_scheduled
>= wq_max_constrained_threads
) {
4047 pwq_state
|= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT
;
4050 if (wq
->wq_nthreads
>= wq_max_threads
) {
4051 pwq_state
|= WQ_EXCEEDED_TOTAL_THREAD_LIMIT
;
4058 _thread_selfid(__unused
struct proc
*p
, uint64_t *retval
)
4060 thread_t thread
= current_thread();
4061 *retval
= thread_tid(thread
);
4062 return KERN_SUCCESS
;
4068 pthread_lck_grp_attr
= lck_grp_attr_alloc_init();
4069 pthread_lck_grp
= lck_grp_alloc_init("pthread", pthread_lck_grp_attr
);
4072 * allocate the lock attribute for pthread synchronizers
4074 pthread_lck_attr
= lck_attr_alloc_init();
4076 pthread_list_mlock
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
4078 pth_global_hashinit();
4079 psynch_thcall
= thread_call_allocate(psynch_wq_cleanup
, NULL
);
4082 pthread_zone_workqueue
= zinit(sizeof(struct workqueue
),
4083 1024 * sizeof(struct workqueue
), 8192, "pthread.workqueue");
4084 pthread_zone_threadlist
= zinit(sizeof(struct threadlist
),
4085 1024 * sizeof(struct threadlist
), 8192, "pthread.threadlist");
4086 pthread_zone_threadreq
= zinit(sizeof(struct threadreq
),
4087 1024 * sizeof(struct threadreq
), 8192, "pthread.threadreq");
4092 sysctl_register_oid(&sysctl__kern_wq_stalled_window_usecs
);
4093 sysctl_register_oid(&sysctl__kern_wq_reduce_pool_window_usecs
);
4094 sysctl_register_oid(&sysctl__kern_wq_max_timer_interval_usecs
);
4095 sysctl_register_oid(&sysctl__kern_wq_max_threads
);
4096 sysctl_register_oid(&sysctl__kern_wq_max_constrained_threads
);
4097 sysctl_register_oid(&sysctl__kern_pthread_debug_tracing
);
4100 sysctl_register_oid(&sysctl__debug_wq_kevent_test
);
4103 for (int i
= 0; i
< WORKQUEUE_NUM_BUCKETS
; i
++) {
4104 uint32_t thread_qos
= _wq_bucket_to_thread_qos(i
);
4105 wq_max_concurrency
[i
] = pthread_kern
->qos_max_parallelism(thread_qos
,
4106 QOS_PARALLELISM_COUNT_LOGICAL
);
4108 wq_max_concurrency
[WORKQUEUE_EVENT_MANAGER_BUCKET
] = 1;