]>
Commit | Line | Data |
---|---|---|
f1a1da6c A |
1 | /* |
2 | * Copyright (c) 2000-2012 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */ | |
29 | /* | |
30 | * pthread_synch.c | |
31 | */ | |
32 | ||
964d3577 A |
33 | #pragma mark - Front Matter |
34 | ||
f1a1da6c A |
35 | #define _PTHREAD_CONDATTR_T |
36 | #define _PTHREAD_COND_T | |
37 | #define _PTHREAD_MUTEXATTR_T | |
38 | #define _PTHREAD_MUTEX_T | |
39 | #define _PTHREAD_RWLOCKATTR_T | |
40 | #define _PTHREAD_RWLOCK_T | |
41 | ||
42 | #undef pthread_mutexattr_t | |
43 | #undef pthread_mutex_t | |
44 | #undef pthread_condattr_t | |
45 | #undef pthread_cond_t | |
46 | #undef pthread_rwlockattr_t | |
47 | #undef pthread_rwlock_t | |
48 | ||
2546420a A |
49 | #include <sys/cdefs.h> |
50 | ||
51 | // <rdar://problem/26158937> panic() should be marked noreturn | |
52 | extern void panic(const char *string, ...) __printflike(1,2) __dead2; | |
53 | ||
f1a1da6c A |
54 | #include <sys/param.h> |
55 | #include <sys/queue.h> | |
56 | #include <sys/resourcevar.h> | |
57 | //#include <sys/proc_internal.h> | |
58 | #include <sys/kauth.h> | |
59 | #include <sys/systm.h> | |
60 | #include <sys/timeb.h> | |
61 | #include <sys/times.h> | |
62 | #include <sys/acct.h> | |
63 | #include <sys/kernel.h> | |
64 | #include <sys/wait.h> | |
65 | #include <sys/signalvar.h> | |
66 | #include <sys/sysctl.h> | |
67 | #include <sys/syslog.h> | |
68 | #include <sys/stat.h> | |
69 | #include <sys/lock.h> | |
70 | #include <sys/kdebug.h> | |
71 | //#include <sys/sysproto.h> | |
72 | #include <sys/vm.h> | |
73 | #include <sys/user.h> /* for coredump */ | |
74 | #include <sys/proc_info.h> /* for fill_procworkqueue */ | |
75 | ||
f1a1da6c A |
76 | #include <mach/mach_port.h> |
77 | #include <mach/mach_types.h> | |
78 | #include <mach/semaphore.h> | |
79 | #include <mach/sync_policy.h> | |
80 | #include <mach/task.h> | |
81 | #include <mach/vm_prot.h> | |
82 | #include <kern/kern_types.h> | |
83 | #include <kern/task.h> | |
84 | #include <kern/clock.h> | |
85 | #include <mach/kern_return.h> | |
86 | #include <kern/thread.h> | |
87 | #include <kern/sched_prim.h> | |
88 | #include <kern/kalloc.h> | |
89 | #include <kern/sched_prim.h> /* for thread_exception_return */ | |
90 | #include <kern/processor.h> | |
91 | #include <kern/assert.h> | |
92 | #include <mach/mach_vm.h> | |
93 | #include <mach/mach_param.h> | |
94 | #include <mach/thread_status.h> | |
95 | #include <mach/thread_policy.h> | |
96 | #include <mach/message.h> | |
97 | #include <mach/port.h> | |
98 | //#include <vm/vm_protos.h> | |
99 | #include <vm/vm_fault.h> | |
100 | #include <vm/vm_map.h> | |
101 | #include <mach/thread_act.h> /* for thread_resume */ | |
102 | #include <machine/machine_routines.h> | |
964d3577 | 103 | #include <mach/shared_region.h> |
f1a1da6c A |
104 | |
105 | #include <libkern/OSAtomic.h> | |
2546420a | 106 | #include <libkern/libkern.h> |
f1a1da6c A |
107 | |
108 | #include <sys/pthread_shims.h> | |
109 | #include "kern_internal.h" | |
110 | ||
f1a1da6c A |
111 | // XXX: Dirty import for sys/signarvar.h that's wrapped in BSD_KERNEL_PRIVATE |
112 | #define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP)) | |
113 | ||
2546420a A |
114 | // XXX: Ditto for thread tags from kern/thread.h |
115 | #define THREAD_TAG_MAINTHREAD 0x1 | |
116 | #define THREAD_TAG_PTHREAD 0x10 | |
117 | #define THREAD_TAG_WORKQUEUE 0x20 | |
118 | ||
f1a1da6c A |
119 | lck_grp_attr_t *pthread_lck_grp_attr; |
120 | lck_grp_t *pthread_lck_grp; | |
121 | lck_attr_t *pthread_lck_attr; | |
122 | ||
123 | extern void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64); | |
124 | extern void workqueue_thread_yielded(void); | |
125 | ||
2546420a A |
126 | enum run_nextreq_mode { |
127 | RUN_NEXTREQ_DEFAULT, | |
128 | RUN_NEXTREQ_DEFAULT_KEVENT, | |
129 | RUN_NEXTREQ_OVERCOMMIT, | |
130 | RUN_NEXTREQ_OVERCOMMIT_KEVENT, | |
131 | RUN_NEXTREQ_DEFERRED_OVERCOMMIT, | |
132 | RUN_NEXTREQ_UNCONSTRAINED, | |
133 | RUN_NEXTREQ_EVENT_MANAGER, | |
134 | RUN_NEXTREQ_ADD_TIMER | |
135 | }; | |
136 | static thread_t workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t th, | |
137 | enum run_nextreq_mode mode, pthread_priority_t prio, | |
138 | bool kevent_bind_via_return); | |
f1a1da6c A |
139 | |
140 | static boolean_t workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, pthread_priority_t priority); | |
141 | ||
2546420a A |
142 | static void wq_runreq(proc_t p, thread_t th, struct workqueue *wq, |
143 | struct threadlist *tl, boolean_t return_directly, boolean_t deferred_kevent); | |
f1a1da6c | 144 | |
2546420a | 145 | static void _setup_wqthread(proc_t p, thread_t th, struct workqueue *wq, struct threadlist *tl, bool first_use); |
f1a1da6c | 146 | |
2546420a A |
147 | static void reset_priority(struct threadlist *tl, pthread_priority_t pri); |
148 | static pthread_priority_t pthread_priority_from_wq_class_index(struct workqueue *wq, int index); | |
149 | ||
150 | static void wq_unpark_continue(void* ptr, wait_result_t wait_result) __dead2; | |
f1a1da6c | 151 | |
964d3577 | 152 | static boolean_t workqueue_addnewthread(struct workqueue *wq, boolean_t ignore_constrained_thread_limit); |
2546420a A |
153 | |
154 | static void workqueue_removethread(struct threadlist *tl, bool fromexit, bool first_use); | |
155 | static void workqueue_lock_spin(struct workqueue *); | |
156 | static void workqueue_unlock(struct workqueue *); | |
f1a1da6c | 157 | |
964d3577 A |
158 | static boolean_t may_start_constrained_thread(struct workqueue *wq, uint32_t at_priclass, uint32_t my_priclass, boolean_t *start_timer); |
159 | ||
2546420a | 160 | static mach_vm_offset_t stack_addr_hint(proc_t p, vm_map_t vmap); |
964d3577 | 161 | |
f1a1da6c A |
162 | int proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc); |
163 | int proc_setalltargetconc(pid_t pid, int32_t * targetconcp); | |
164 | ||
165 | #define WQ_MAXPRI_MIN 0 /* low prio queue num */ | |
166 | #define WQ_MAXPRI_MAX 2 /* max prio queuenum */ | |
167 | #define WQ_PRI_NUM 3 /* number of prio work queues */ | |
168 | ||
169 | #define C_32_STK_ALIGN 16 | |
170 | #define C_64_STK_ALIGN 16 | |
171 | #define C_64_REDZONE_LEN 128 | |
964d3577 A |
172 | |
173 | #define PTHREAD_T_OFFSET 0 | |
f1a1da6c A |
174 | |
175 | /* | |
176 | * Flags filed passed to bsdthread_create and back in pthread_start | |
177 | 31 <---------------------------------> 0 | |
178 | _________________________________________ | |
179 | | flags(8) | policy(8) | importance(16) | | |
180 | ----------------------------------------- | |
181 | */ | |
182 | ||
2546420a A |
183 | #define PTHREAD_START_CUSTOM 0x01000000 |
184 | #define PTHREAD_START_SETSCHED 0x02000000 | |
185 | #define PTHREAD_START_DETACHED 0x04000000 | |
186 | #define PTHREAD_START_QOSCLASS 0x08000000 | |
187 | #define PTHREAD_START_TSD_BASE_SET 0x10000000 | |
188 | #define PTHREAD_START_QOSCLASS_MASK 0x00ffffff | |
f1a1da6c A |
189 | #define PTHREAD_START_POLICY_BITSHIFT 16 |
190 | #define PTHREAD_START_POLICY_MASK 0xff | |
191 | #define PTHREAD_START_IMPORTANCE_MASK 0xffff | |
192 | ||
193 | #define SCHED_OTHER POLICY_TIMESHARE | |
194 | #define SCHED_FIFO POLICY_FIFO | |
195 | #define SCHED_RR POLICY_RR | |
196 | ||
964d3577 A |
197 | #define BASEPRI_DEFAULT 31 |
198 | ||
2546420a A |
199 | #pragma mark sysctls |
200 | ||
201 | uint32_t wq_yielded_threshold = WQ_YIELDED_THRESHOLD; | |
202 | uint32_t wq_yielded_window_usecs = WQ_YIELDED_WINDOW_USECS; | |
203 | uint32_t wq_stalled_window_usecs = WQ_STALLED_WINDOW_USECS; | |
204 | uint32_t wq_reduce_pool_window_usecs = WQ_REDUCE_POOL_WINDOW_USECS; | |
205 | uint32_t wq_max_timer_interval_usecs = WQ_MAX_TIMER_INTERVAL_USECS; | |
206 | uint32_t wq_max_threads = WORKQUEUE_MAXTHREADS; | |
207 | uint32_t wq_max_constrained_threads = WORKQUEUE_MAXTHREADS / 8; | |
208 | uint32_t wq_max_concurrency = 1; // set to ncpus on load | |
209 | ||
210 | SYSCTL_INT(_kern, OID_AUTO, wq_yielded_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, | |
211 | &wq_yielded_threshold, 0, ""); | |
212 | ||
213 | SYSCTL_INT(_kern, OID_AUTO, wq_yielded_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED, | |
214 | &wq_yielded_window_usecs, 0, ""); | |
215 | ||
216 | SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED, | |
217 | &wq_stalled_window_usecs, 0, ""); | |
218 | ||
219 | SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED, | |
220 | &wq_reduce_pool_window_usecs, 0, ""); | |
221 | ||
222 | SYSCTL_INT(_kern, OID_AUTO, wq_max_timer_interval_usecs, CTLFLAG_RW | CTLFLAG_LOCKED, | |
223 | &wq_max_timer_interval_usecs, 0, ""); | |
224 | ||
225 | SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW | CTLFLAG_LOCKED, | |
226 | &wq_max_threads, 0, ""); | |
227 | ||
228 | SYSCTL_INT(_kern, OID_AUTO, wq_max_constrained_threads, CTLFLAG_RW | CTLFLAG_LOCKED, | |
229 | &wq_max_constrained_threads, 0, ""); | |
230 | ||
231 | #ifdef DEBUG | |
232 | SYSCTL_INT(_kern, OID_AUTO, wq_max_concurrency, CTLFLAG_RW | CTLFLAG_LOCKED, | |
233 | &wq_max_concurrency, 0, ""); | |
234 | ||
235 | static int wq_kevent_test SYSCTL_HANDLER_ARGS; | |
236 | SYSCTL_PROC(_debug, OID_AUTO, wq_kevent_test, CTLFLAG_MASKED | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLTYPE_OPAQUE, NULL, 0, wq_kevent_test, 0, "-"); | |
237 | #endif | |
238 | ||
239 | static uint32_t wq_init_constrained_limit = 1; | |
240 | ||
241 | uint32_t pthread_debug_tracing = 1; | |
242 | ||
243 | SYSCTL_INT(_kern, OID_AUTO, pthread_debug_tracing, CTLFLAG_RW | CTLFLAG_LOCKED, | |
244 | &pthread_debug_tracing, 0, "") | |
245 | ||
246 | ||
964d3577 A |
247 | #pragma mark - Process/Thread Setup/Teardown syscalls |
248 | ||
2546420a A |
249 | static mach_vm_offset_t |
250 | stack_addr_hint(proc_t p, vm_map_t vmap) | |
251 | { | |
964d3577 | 252 | mach_vm_offset_t stackaddr; |
2546420a A |
253 | mach_vm_offset_t aslr_offset; |
254 | bool proc64bit = proc_is64bit(p); | |
255 | ||
256 | // We can't safely take random values % something unless its a power-of-two | |
257 | _Static_assert(powerof2(PTH_DEFAULT_STACKSIZE), "PTH_DEFAULT_STACKSIZE is a power-of-two"); | |
258 | ||
964d3577 | 259 | #if defined(__i386__) || defined(__x86_64__) |
2546420a A |
260 | if (proc64bit) { |
261 | // Matches vm_map_get_max_aslr_slide_pages's image shift in xnu | |
262 | aslr_offset = random() % (1 << 28); // about 512 stacks | |
263 | } else { | |
264 | // Actually bigger than the image shift, we've got ~256MB to work with | |
265 | aslr_offset = random() % (16 * PTH_DEFAULT_STACKSIZE); | |
266 | } | |
267 | aslr_offset = vm_map_trunc_page_mask(aslr_offset, vm_map_page_mask(vmap)); | |
268 | if (proc64bit) { | |
964d3577 | 269 | // Above nanomalloc range (see NANOZONE_SIGNATURE) |
2546420a | 270 | stackaddr = 0x700000000000 + aslr_offset; |
964d3577 | 271 | } else { |
2546420a | 272 | stackaddr = SHARED_REGION_BASE_I386 + SHARED_REGION_SIZE_I386 + aslr_offset; |
964d3577 A |
273 | } |
274 | #elif defined(__arm__) || defined(__arm64__) | |
2546420a A |
275 | // vm_map_get_max_aslr_slide_pages ensures 1MB of slide, we do better |
276 | aslr_offset = random() % ((proc64bit ? 4 : 2) * PTH_DEFAULT_STACKSIZE); | |
277 | aslr_offset = vm_map_trunc_page_mask((vm_map_offset_t)aslr_offset, vm_map_page_mask(vmap)); | |
278 | if (proc64bit) { | |
964d3577 | 279 | // 64 stacks below nanomalloc (see NANOZONE_SIGNATURE) |
2546420a | 280 | stackaddr = 0x170000000 - 64 * PTH_DEFAULT_STACKSIZE - aslr_offset; |
964d3577 | 281 | } else { |
2546420a A |
282 | // If you try to slide down from this point, you risk ending up in memory consumed by malloc |
283 | stackaddr = SHARED_REGION_BASE_ARM - 32 * PTH_DEFAULT_STACKSIZE + aslr_offset; | |
964d3577 A |
284 | } |
285 | #else | |
286 | #error Need to define a stack address hint for this architecture | |
287 | #endif | |
288 | return stackaddr; | |
289 | } | |
290 | ||
291 | /** | |
292 | * bsdthread_create system call. Used by pthread_create. | |
293 | */ | |
f1a1da6c A |
294 | int |
295 | _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcarg, user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags, user_addr_t *retval) | |
296 | { | |
297 | kern_return_t kret; | |
298 | void * sright; | |
299 | int error = 0; | |
300 | int allocated = 0; | |
301 | mach_vm_offset_t stackaddr; | |
302 | mach_vm_size_t th_allocsize = 0; | |
f1a1da6c | 303 | mach_vm_size_t th_guardsize; |
f1a1da6c A |
304 | mach_vm_offset_t th_stack; |
305 | mach_vm_offset_t th_pthread; | |
2546420a | 306 | mach_vm_offset_t th_tsd_base; |
f1a1da6c A |
307 | mach_port_name_t th_thport; |
308 | thread_t th; | |
309 | vm_map_t vmap = pthread_kern->current_map(); | |
310 | task_t ctask = current_task(); | |
311 | unsigned int policy, importance; | |
2546420a A |
312 | uint32_t tsd_offset; |
313 | ||
f1a1da6c A |
314 | int isLP64 = 0; |
315 | ||
316 | if (pthread_kern->proc_get_register(p) == 0) { | |
317 | return EINVAL; | |
318 | } | |
319 | ||
320 | PTHREAD_TRACE(TRACE_pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0, 0); | |
321 | ||
322 | isLP64 = proc_is64bit(p); | |
323 | th_guardsize = vm_map_page_size(vmap); | |
324 | ||
2546420a | 325 | stackaddr = pthread_kern->proc_get_stack_addr_hint(p); |
f1a1da6c A |
326 | kret = pthread_kern->thread_create(ctask, &th); |
327 | if (kret != KERN_SUCCESS) | |
328 | return(ENOMEM); | |
329 | thread_reference(th); | |
330 | ||
2546420a A |
331 | pthread_kern->thread_set_tag(th, THREAD_TAG_PTHREAD); |
332 | ||
f1a1da6c A |
333 | sright = (void *)pthread_kern->convert_thread_to_port(th); |
334 | th_thport = pthread_kern->ipc_port_copyout_send(sright, pthread_kern->task_get_ipcspace(ctask)); | |
335 | ||
964d3577 A |
336 | if ((flags & PTHREAD_START_CUSTOM) == 0) { |
337 | mach_vm_size_t pthread_size = | |
338 | vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(vmap)); | |
339 | th_allocsize = th_guardsize + user_stack + pthread_size; | |
340 | user_stack += PTHREAD_T_OFFSET; | |
341 | ||
2546420a A |
342 | kret = mach_vm_map(vmap, &stackaddr, |
343 | th_allocsize, | |
964d3577 A |
344 | page_size-1, |
345 | VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL, | |
346 | 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, | |
347 | VM_INHERIT_DEFAULT); | |
348 | if (kret != KERN_SUCCESS){ | |
2546420a | 349 | kret = mach_vm_allocate(vmap, |
964d3577 A |
350 | &stackaddr, th_allocsize, |
351 | VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE); | |
352 | } | |
353 | if (kret != KERN_SUCCESS) { | |
f1a1da6c A |
354 | error = ENOMEM; |
355 | goto out; | |
964d3577 | 356 | } |
f1a1da6c A |
357 | |
358 | PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, th_allocsize, stackaddr, 0, 2, 0); | |
359 | ||
f1a1da6c | 360 | allocated = 1; |
964d3577 | 361 | /* |
f1a1da6c | 362 | * The guard page is at the lowest address |
964d3577 | 363 | * The stack base is the highest address |
f1a1da6c A |
364 | */ |
365 | kret = mach_vm_protect(vmap, stackaddr, th_guardsize, FALSE, VM_PROT_NONE); | |
366 | ||
964d3577 | 367 | if (kret != KERN_SUCCESS) { |
f1a1da6c A |
368 | error = ENOMEM; |
369 | goto out1; | |
964d3577 A |
370 | } |
371 | ||
372 | th_pthread = stackaddr + th_guardsize + user_stack; | |
373 | th_stack = th_pthread; | |
f1a1da6c | 374 | |
964d3577 | 375 | /* |
f1a1da6c A |
376 | * Pre-fault the first page of the new thread's stack and the page that will |
377 | * contain the pthread_t structure. | |
378 | */ | |
964d3577 A |
379 | if (vm_map_trunc_page_mask((vm_map_offset_t)(th_stack - C_64_REDZONE_LEN), vm_map_page_mask(vmap)) != |
380 | vm_map_trunc_page_mask((vm_map_offset_t)th_pthread, vm_map_page_mask(vmap))){ | |
381 | vm_fault( vmap, | |
382 | vm_map_trunc_page_mask((vm_map_offset_t)(th_stack - C_64_REDZONE_LEN), vm_map_page_mask(vmap)), | |
383 | VM_PROT_READ | VM_PROT_WRITE, | |
384 | FALSE, | |
385 | THREAD_UNINT, NULL, 0); | |
386 | } | |
f1a1da6c A |
387 | |
388 | vm_fault( vmap, | |
964d3577 A |
389 | vm_map_trunc_page_mask((vm_map_offset_t)th_pthread, vm_map_page_mask(vmap)), |
390 | VM_PROT_READ | VM_PROT_WRITE, | |
391 | FALSE, | |
392 | THREAD_UNINT, NULL, 0); | |
393 | ||
f1a1da6c A |
394 | } else { |
395 | th_stack = user_stack; | |
f1a1da6c A |
396 | th_pthread = user_pthread; |
397 | ||
398 | PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3, 0); | |
399 | } | |
2546420a A |
400 | |
401 | tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p); | |
402 | if (tsd_offset) { | |
403 | th_tsd_base = th_pthread + tsd_offset; | |
404 | kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base); | |
405 | if (kret == KERN_SUCCESS) { | |
406 | flags |= PTHREAD_START_TSD_BASE_SET; | |
407 | } | |
408 | } | |
409 | ||
f1a1da6c A |
410 | #if defined(__i386__) || defined(__x86_64__) |
411 | /* | |
412 | * Set up i386 registers & function call. | |
413 | */ | |
414 | if (isLP64 == 0) { | |
964d3577 A |
415 | x86_thread_state32_t state = { |
416 | .eip = (unsigned int)pthread_kern->proc_get_threadstart(p), | |
417 | .eax = (unsigned int)th_pthread, | |
418 | .ebx = (unsigned int)th_thport, | |
419 | .ecx = (unsigned int)user_func, | |
420 | .edx = (unsigned int)user_funcarg, | |
421 | .edi = (unsigned int)user_stack, | |
422 | .esi = (unsigned int)flags, | |
423 | /* | |
424 | * set stack pointer | |
425 | */ | |
426 | .esp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN)) | |
427 | }; | |
f1a1da6c | 428 | |
964d3577 | 429 | error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); |
f1a1da6c A |
430 | if (error != KERN_SUCCESS) { |
431 | error = EINVAL; | |
432 | goto out; | |
433 | } | |
434 | } else { | |
964d3577 A |
435 | x86_thread_state64_t state64 = { |
436 | .rip = (uint64_t)pthread_kern->proc_get_threadstart(p), | |
437 | .rdi = (uint64_t)th_pthread, | |
438 | .rsi = (uint64_t)(th_thport), | |
439 | .rdx = (uint64_t)user_func, | |
440 | .rcx = (uint64_t)user_funcarg, | |
441 | .r8 = (uint64_t)user_stack, | |
442 | .r9 = (uint64_t)flags, | |
443 | /* | |
444 | * set stack pointer aligned to 16 byte boundary | |
445 | */ | |
446 | .rsp = (uint64_t)(th_stack - C_64_REDZONE_LEN) | |
447 | }; | |
f1a1da6c | 448 | |
964d3577 | 449 | error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64); |
f1a1da6c A |
450 | if (error != KERN_SUCCESS) { |
451 | error = EINVAL; | |
452 | goto out; | |
453 | } | |
454 | ||
455 | } | |
456 | #elif defined(__arm__) | |
964d3577 A |
457 | arm_thread_state_t state = { |
458 | .pc = (int)pthread_kern->proc_get_threadstart(p), | |
459 | .r[0] = (unsigned int)th_pthread, | |
460 | .r[1] = (unsigned int)th_thport, | |
461 | .r[2] = (unsigned int)user_func, | |
462 | .r[3] = (unsigned int)user_funcarg, | |
463 | .r[4] = (unsigned int)user_stack, | |
464 | .r[5] = (unsigned int)flags, | |
465 | ||
466 | /* Set r7 & lr to 0 for better back tracing */ | |
467 | .r[7] = 0, | |
468 | .lr = 0, | |
469 | ||
470 | /* | |
471 | * set stack pointer | |
472 | */ | |
473 | .sp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN)) | |
474 | }; | |
f1a1da6c | 475 | |
964d3577 | 476 | (void) pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); |
f1a1da6c A |
477 | |
478 | #else | |
479 | #error bsdthread_create not defined for this architecture | |
480 | #endif | |
481 | ||
482 | if ((flags & PTHREAD_START_SETSCHED) != 0) { | |
483 | /* Set scheduling parameters if needed */ | |
484 | thread_extended_policy_data_t extinfo; | |
485 | thread_precedence_policy_data_t precedinfo; | |
486 | ||
487 | importance = (flags & PTHREAD_START_IMPORTANCE_MASK); | |
488 | policy = (flags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK; | |
489 | ||
490 | if (policy == SCHED_OTHER) { | |
491 | extinfo.timeshare = 1; | |
492 | } else { | |
493 | extinfo.timeshare = 0; | |
494 | } | |
495 | ||
496 | thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT); | |
497 | ||
f1a1da6c A |
498 | precedinfo.importance = (importance - BASEPRI_DEFAULT); |
499 | thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT); | |
500 | } else if ((flags & PTHREAD_START_QOSCLASS) != 0) { | |
501 | /* Set thread QoS class if requested. */ | |
502 | pthread_priority_t priority = (pthread_priority_t)(flags & PTHREAD_START_QOSCLASS_MASK); | |
503 | ||
504 | thread_qos_policy_data_t qos; | |
2546420a | 505 | qos.qos_tier = pthread_priority_get_thread_qos(priority); |
f1a1da6c A |
506 | qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 : |
507 | _pthread_priority_get_relpri(priority); | |
508 | ||
509 | pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT); | |
510 | } | |
511 | ||
512 | kret = pthread_kern->thread_resume(th); | |
513 | if (kret != KERN_SUCCESS) { | |
514 | error = EINVAL; | |
515 | goto out1; | |
516 | } | |
517 | thread_deallocate(th); /* drop the creator reference */ | |
518 | ||
519 | PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_END, error, th_pthread, 0, 0, 0); | |
520 | ||
964d3577 A |
521 | // cast required as mach_vm_offset_t is always 64 bits even on 32-bit platforms |
522 | *retval = (user_addr_t)th_pthread; | |
f1a1da6c A |
523 | |
524 | return(0); | |
525 | ||
526 | out1: | |
527 | if (allocated != 0) { | |
964d3577 | 528 | (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize); |
f1a1da6c A |
529 | } |
530 | out: | |
531 | (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(ctask), th_thport); | |
532 | (void)thread_terminate(th); | |
533 | (void)thread_deallocate(th); | |
534 | return(error); | |
535 | } | |
536 | ||
964d3577 A |
537 | /** |
538 | * bsdthread_terminate system call. Used by pthread_terminate | |
539 | */ | |
f1a1da6c A |
540 | int |
541 | _bsdthread_terminate(__unused struct proc *p, | |
542 | user_addr_t stackaddr, | |
543 | size_t size, | |
544 | uint32_t kthport, | |
545 | uint32_t sem, | |
546 | __unused int32_t *retval) | |
547 | { | |
548 | mach_vm_offset_t freeaddr; | |
549 | mach_vm_size_t freesize; | |
550 | kern_return_t kret; | |
2546420a | 551 | thread_t th = current_thread(); |
f1a1da6c A |
552 | |
553 | freeaddr = (mach_vm_offset_t)stackaddr; | |
554 | freesize = size; | |
555 | ||
556 | PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff, 0); | |
557 | ||
558 | if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) { | |
2546420a A |
559 | if (pthread_kern->thread_get_tag(th) & THREAD_TAG_MAINTHREAD){ |
560 | vm_map_t user_map = pthread_kern->current_map(); | |
561 | freesize = vm_map_trunc_page_mask((vm_map_offset_t)freesize - 1, vm_map_page_mask(user_map)); | |
562 | kret = mach_vm_behavior_set(user_map, freeaddr, freesize, VM_BEHAVIOR_REUSABLE); | |
563 | assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS); | |
564 | kret = kret ? kret : mach_vm_protect(user_map, freeaddr, freesize, FALSE, VM_PROT_NONE); | |
565 | assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS); | |
566 | } else { | |
567 | kret = mach_vm_deallocate(pthread_kern->current_map(), freeaddr, freesize); | |
568 | if (kret != KERN_SUCCESS) { | |
569 | PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0); | |
570 | return(EINVAL); | |
571 | } | |
f1a1da6c A |
572 | } |
573 | } | |
574 | ||
2546420a | 575 | (void) thread_terminate(th); |
f1a1da6c A |
576 | if (sem != MACH_PORT_NULL) { |
577 | kret = pthread_kern->semaphore_signal_internal_trap(sem); | |
578 | if (kret != KERN_SUCCESS) { | |
579 | PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0); | |
580 | return(EINVAL); | |
581 | } | |
582 | } | |
583 | ||
584 | if (kthport != MACH_PORT_NULL) { | |
585 | pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(current_task()), kthport); | |
586 | } | |
587 | ||
588 | PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0, 0); | |
589 | ||
590 | pthread_kern->thread_exception_return(); | |
591 | panic("bsdthread_terminate: still running\n"); | |
592 | ||
593 | PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0xff, 0, 0, 0); | |
594 | ||
595 | return(0); | |
596 | } | |
597 | ||
964d3577 A |
598 | /** |
599 | * bsdthread_register system call. Performs per-process setup. Responsible for | |
600 | * returning capabilitiy bits to userspace and receiving userspace function addresses. | |
601 | */ | |
f1a1da6c A |
602 | int |
603 | _bsdthread_register(struct proc *p, | |
604 | user_addr_t threadstart, | |
605 | user_addr_t wqthread, | |
606 | int pthsize, | |
607 | user_addr_t pthread_init_data, | |
2546420a | 608 | user_addr_t pthread_init_data_size, |
f1a1da6c A |
609 | uint64_t dispatchqueue_offset, |
610 | int32_t *retval) | |
611 | { | |
2546420a A |
612 | /* We have to do this first so that it resets after fork */ |
613 | pthread_kern->proc_set_stack_addr_hint(p, (user_addr_t)stack_addr_hint(p, pthread_kern->current_map())); | |
614 | ||
f1a1da6c A |
615 | /* prevent multiple registrations */ |
616 | if (pthread_kern->proc_get_register(p) != 0) { | |
617 | return(EINVAL); | |
618 | } | |
619 | /* syscall randomizer test can pass bogus values */ | |
620 | if (pthsize < 0 || pthsize > MAX_PTHREAD_SIZE) { | |
621 | return(EINVAL); | |
622 | } | |
623 | pthread_kern->proc_set_threadstart(p, threadstart); | |
624 | pthread_kern->proc_set_wqthread(p, wqthread); | |
625 | pthread_kern->proc_set_pthsize(p, pthsize); | |
626 | pthread_kern->proc_set_register(p); | |
627 | ||
628 | /* if we have pthread_init_data, then we use that and target_concptr (which is an offset) get data. */ | |
629 | if (pthread_init_data != 0) { | |
630 | thread_qos_policy_data_t qos; | |
631 | ||
2546420a A |
632 | struct _pthread_registration_data data = {}; |
633 | size_t pthread_init_sz = MIN(sizeof(struct _pthread_registration_data), (size_t)pthread_init_data_size); | |
f1a1da6c A |
634 | |
635 | kern_return_t kr = copyin(pthread_init_data, &data, pthread_init_sz); | |
636 | if (kr != KERN_SUCCESS) { | |
637 | return EINVAL; | |
638 | } | |
639 | ||
640 | /* Incoming data from the data structure */ | |
641 | pthread_kern->proc_set_dispatchqueue_offset(p, data.dispatch_queue_offset); | |
2546420a A |
642 | if (data.version > offsetof(struct _pthread_registration_data, tsd_offset) |
643 | && data.tsd_offset < (uint32_t)pthsize) { | |
644 | pthread_kern->proc_set_pthread_tsd_offset(p, data.tsd_offset); | |
645 | } | |
f1a1da6c A |
646 | |
647 | /* Outgoing data that userspace expects as a reply */ | |
2546420a | 648 | data.version = sizeof(struct _pthread_registration_data); |
f1a1da6c A |
649 | if (pthread_kern->qos_main_thread_active()) { |
650 | mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT; | |
651 | boolean_t gd = FALSE; | |
652 | ||
653 | kr = pthread_kern->thread_policy_get(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd); | |
654 | if (kr != KERN_SUCCESS || qos.qos_tier == THREAD_QOS_UNSPECIFIED) { | |
655 | /* Unspecified threads means the kernel wants us to impose legacy upon the thread. */ | |
656 | qos.qos_tier = THREAD_QOS_LEGACY; | |
657 | qos.tier_importance = 0; | |
658 | ||
659 | kr = pthread_kern->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT); | |
660 | } | |
661 | ||
662 | if (kr == KERN_SUCCESS) { | |
2546420a | 663 | data.main_qos = thread_qos_get_pthread_priority(qos.qos_tier); |
f1a1da6c A |
664 | } else { |
665 | data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0); | |
666 | } | |
667 | } else { | |
668 | data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0); | |
669 | } | |
670 | ||
671 | kr = copyout(&data, pthread_init_data, pthread_init_sz); | |
672 | if (kr != KERN_SUCCESS) { | |
673 | return EINVAL; | |
674 | } | |
675 | } else { | |
676 | pthread_kern->proc_set_dispatchqueue_offset(p, dispatchqueue_offset); | |
f1a1da6c A |
677 | } |
678 | ||
679 | /* return the supported feature set as the return value. */ | |
680 | *retval = PTHREAD_FEATURE_SUPPORTED; | |
681 | ||
682 | return(0); | |
683 | } | |
684 | ||
964d3577 A |
685 | #pragma mark - QoS Manipulation |
686 | ||
f1a1da6c A |
687 | int |
688 | _bsdthread_ctl_set_qos(struct proc *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t tsd_priority_addr, user_addr_t arg3, int *retval) | |
689 | { | |
690 | kern_return_t kr; | |
691 | thread_t th; | |
692 | ||
693 | pthread_priority_t priority; | |
694 | ||
695 | /* Unused parameters must be zero. */ | |
696 | if (arg3 != 0) { | |
697 | return EINVAL; | |
698 | } | |
699 | ||
700 | /* QoS is stored in a given slot in the pthread TSD. We need to copy that in and set our QoS based on it. */ | |
701 | if (proc_is64bit(p)) { | |
702 | uint64_t v; | |
703 | kr = copyin(tsd_priority_addr, &v, sizeof(v)); | |
704 | if (kr != KERN_SUCCESS) { | |
705 | return kr; | |
706 | } | |
707 | priority = (int)(v & 0xffffffff); | |
708 | } else { | |
709 | uint32_t v; | |
710 | kr = copyin(tsd_priority_addr, &v, sizeof(v)); | |
711 | if (kr != KERN_SUCCESS) { | |
712 | return kr; | |
713 | } | |
714 | priority = v; | |
715 | } | |
716 | ||
717 | if ((th = port_name_to_thread(kport)) == THREAD_NULL) { | |
718 | return ESRCH; | |
719 | } | |
720 | ||
721 | /* <rdar://problem/16211829> Disable pthread_set_qos_class_np() on threads other than pthread_self */ | |
722 | if (th != current_thread()) { | |
723 | thread_deallocate(th); | |
724 | return EPERM; | |
725 | } | |
726 | ||
727 | int rv = _bsdthread_ctl_set_self(p, 0, priority, 0, _PTHREAD_SET_SELF_QOS_FLAG, retval); | |
728 | ||
729 | /* Static param the thread, we just set QoS on it, so its stuck in QoS land now. */ | |
730 | /* pthread_kern->thread_static_param(th, TRUE); */ // see <rdar://problem/16433744>, for details | |
731 | ||
732 | thread_deallocate(th); | |
733 | ||
734 | return rv; | |
735 | } | |
736 | ||
737 | static inline struct threadlist * | |
738 | util_get_thread_threadlist_entry(thread_t th) | |
739 | { | |
740 | struct uthread *uth = pthread_kern->get_bsdthread_info(th); | |
741 | if (uth) { | |
742 | struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth); | |
743 | return tl; | |
744 | } | |
745 | return NULL; | |
746 | } | |
747 | ||
f1a1da6c A |
748 | int |
749 | _bsdthread_ctl_set_self(struct proc *p, user_addr_t __unused cmd, pthread_priority_t priority, mach_port_name_t voucher, _pthread_set_flags_t flags, int __unused *retval) | |
750 | { | |
751 | thread_qos_policy_data_t qos; | |
752 | mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT; | |
753 | boolean_t gd = FALSE; | |
2546420a A |
754 | bool was_manager_thread = false; |
755 | thread_t th = current_thread(); | |
756 | struct workqueue *wq = NULL; | |
757 | struct threadlist *tl = NULL; | |
f1a1da6c A |
758 | |
759 | kern_return_t kr; | |
760 | int qos_rv = 0, voucher_rv = 0, fixedpri_rv = 0; | |
761 | ||
2546420a A |
762 | if ((flags & _PTHREAD_SET_SELF_WQ_KEVENT_UNBIND) != 0) { |
763 | tl = util_get_thread_threadlist_entry(th); | |
764 | if (tl) { | |
765 | wq = tl->th_workq; | |
766 | } else { | |
767 | goto qos; | |
768 | } | |
769 | ||
770 | workqueue_lock_spin(wq); | |
771 | if (tl->th_flags & TH_LIST_KEVENT_BOUND) { | |
772 | tl->th_flags &= ~TH_LIST_KEVENT_BOUND; | |
773 | unsigned int kevent_flags = KEVENT_FLAG_WORKQ; | |
774 | if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) { | |
775 | kevent_flags |= KEVENT_FLAG_WORKQ_MANAGER; | |
776 | } | |
777 | ||
778 | workqueue_unlock(wq); | |
779 | kevent_qos_internal_unbind(p, class_index_get_thread_qos(tl->th_priority), th, kevent_flags); | |
780 | } else { | |
781 | workqueue_unlock(wq); | |
782 | } | |
783 | } | |
784 | ||
785 | qos: | |
f1a1da6c | 786 | if ((flags & _PTHREAD_SET_SELF_QOS_FLAG) != 0) { |
2546420a | 787 | kr = pthread_kern->thread_policy_get(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd); |
f1a1da6c A |
788 | if (kr != KERN_SUCCESS) { |
789 | qos_rv = EINVAL; | |
790 | goto voucher; | |
791 | } | |
792 | ||
793 | /* If we have main-thread QoS then we don't allow a thread to come out of QOS_CLASS_UNSPECIFIED. */ | |
794 | if (pthread_kern->qos_main_thread_active() && qos.qos_tier == THREAD_QOS_UNSPECIFIED) { | |
795 | qos_rv = EPERM; | |
796 | goto voucher; | |
797 | } | |
798 | ||
799 | /* Get the work queue for tracing, also the threadlist for bucket manipluation. */ | |
2546420a A |
800 | if (!tl) { |
801 | tl = util_get_thread_threadlist_entry(th); | |
802 | if (tl) wq = tl->th_workq; | |
f1a1da6c A |
803 | } |
804 | ||
2546420a | 805 | PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self | DBG_FUNC_START, wq, qos.qos_tier, qos.tier_importance, 0, 0); |
f1a1da6c | 806 | |
2546420a | 807 | qos.qos_tier = pthread_priority_get_thread_qos(priority); |
f1a1da6c A |
808 | qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 : _pthread_priority_get_relpri(priority); |
809 | ||
2546420a | 810 | if (qos.qos_tier == QOS_CLASS_UNSPECIFIED) { |
f1a1da6c A |
811 | qos_rv = EINVAL; |
812 | goto voucher; | |
813 | } | |
814 | ||
815 | /* If we're a workqueue, the threadlist item priority needs adjusting, along with the bucket we were running in. */ | |
816 | if (tl) { | |
2546420a A |
817 | workqueue_lock_spin(wq); |
818 | bool now_under_constrained_limit = false; | |
819 | ||
820 | assert(!(tl->th_flags & TH_LIST_KEVENT_BOUND)); | |
821 | ||
822 | kr = pthread_kern->thread_set_workq_qos(th, qos.qos_tier, qos.tier_importance); | |
823 | assert(kr == KERN_SUCCESS || kr == KERN_TERMINATED); | |
f1a1da6c A |
824 | |
825 | /* Fix up counters. */ | |
826 | uint8_t old_bucket = tl->th_priority; | |
827 | uint8_t new_bucket = pthread_priority_get_class_index(priority); | |
2546420a A |
828 | if (old_bucket == WORKQUEUE_EVENT_MANAGER_BUCKET) { |
829 | was_manager_thread = true; | |
830 | } | |
f1a1da6c A |
831 | |
832 | uint32_t old_active = OSAddAtomic(-1, &wq->wq_thactive_count[old_bucket]); | |
833 | OSAddAtomic(1, &wq->wq_thactive_count[new_bucket]); | |
834 | ||
835 | wq->wq_thscheduled_count[old_bucket]--; | |
836 | wq->wq_thscheduled_count[new_bucket]++; | |
837 | ||
2546420a A |
838 | bool old_overcommit = !(tl->th_flags & TH_LIST_CONSTRAINED); |
839 | bool new_overcommit = priority & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG; | |
840 | if (!old_overcommit && new_overcommit) { | |
841 | wq->wq_constrained_threads_scheduled--; | |
842 | tl->th_flags &= ~TH_LIST_CONSTRAINED; | |
843 | if (wq->wq_constrained_threads_scheduled == wq_max_constrained_threads - 1) { | |
844 | now_under_constrained_limit = true; | |
845 | } | |
846 | } else if (old_overcommit && !new_overcommit) { | |
847 | wq->wq_constrained_threads_scheduled++; | |
848 | tl->th_flags |= TH_LIST_CONSTRAINED; | |
849 | } | |
850 | ||
f1a1da6c A |
851 | tl->th_priority = new_bucket; |
852 | ||
2546420a A |
853 | /* If we were at the ceiling of threads for a given bucket, we have |
854 | * to reevaluate whether we should start more work. | |
f1a1da6c | 855 | */ |
2546420a | 856 | if (old_active == wq->wq_reqconc[old_bucket] || now_under_constrained_limit) { |
f1a1da6c | 857 | /* workqueue_run_nextreq will drop the workqueue lock in all exit paths. */ |
2546420a | 858 | (void)workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_DEFAULT, 0, false); |
f1a1da6c | 859 | } else { |
2546420a A |
860 | workqueue_unlock(wq); |
861 | } | |
862 | } else { | |
863 | kr = pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT); | |
864 | if (kr != KERN_SUCCESS) { | |
865 | qos_rv = EINVAL; | |
f1a1da6c A |
866 | } |
867 | } | |
868 | ||
2546420a | 869 | PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self | DBG_FUNC_END, wq, qos.qos_tier, qos.tier_importance, 0, 0); |
f1a1da6c A |
870 | } |
871 | ||
872 | voucher: | |
873 | if ((flags & _PTHREAD_SET_SELF_VOUCHER_FLAG) != 0) { | |
874 | kr = pthread_kern->thread_set_voucher_name(voucher); | |
875 | if (kr != KERN_SUCCESS) { | |
876 | voucher_rv = ENOENT; | |
877 | goto fixedpri; | |
878 | } | |
879 | } | |
880 | ||
881 | fixedpri: | |
2546420a | 882 | if (qos_rv) goto done; |
f1a1da6c | 883 | if ((flags & _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG) != 0) { |
964d3577 | 884 | thread_extended_policy_data_t extpol = {.timeshare = 0}; |
2546420a A |
885 | |
886 | if (!tl) tl = util_get_thread_threadlist_entry(th); | |
964d3577 A |
887 | if (tl) { |
888 | /* Not allowed on workqueue threads */ | |
889 | fixedpri_rv = ENOTSUP; | |
890 | goto done; | |
891 | } | |
892 | ||
2546420a | 893 | kr = pthread_kern->thread_policy_set_internal(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT); |
964d3577 A |
894 | if (kr != KERN_SUCCESS) { |
895 | fixedpri_rv = EINVAL; | |
896 | goto done; | |
897 | } | |
898 | } else if ((flags & _PTHREAD_SET_SELF_TIMESHARE_FLAG) != 0) { | |
899 | thread_extended_policy_data_t extpol = {.timeshare = 1}; | |
2546420a A |
900 | |
901 | if (!tl) tl = util_get_thread_threadlist_entry(th); | |
f1a1da6c | 902 | if (tl) { |
964d3577 | 903 | /* Not allowed on workqueue threads */ |
f1a1da6c A |
904 | fixedpri_rv = ENOTSUP; |
905 | goto done; | |
906 | } | |
907 | ||
2546420a | 908 | kr = pthread_kern->thread_policy_set_internal(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT); |
f1a1da6c A |
909 | if (kr != KERN_SUCCESS) { |
910 | fixedpri_rv = EINVAL; | |
911 | goto done; | |
912 | } | |
913 | } | |
2546420a | 914 | |
f1a1da6c A |
915 | done: |
916 | if (qos_rv && voucher_rv) { | |
917 | /* Both failed, give that a unique error. */ | |
918 | return EBADMSG; | |
919 | } | |
920 | ||
921 | if (qos_rv) { | |
922 | return qos_rv; | |
923 | } | |
924 | ||
925 | if (voucher_rv) { | |
926 | return voucher_rv; | |
927 | } | |
928 | ||
929 | if (fixedpri_rv) { | |
930 | return fixedpri_rv; | |
931 | } | |
932 | ||
933 | return 0; | |
934 | } | |
935 | ||
936 | int | |
215aeb03 | 937 | _bsdthread_ctl_qos_override_start(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, int __unused *retval) |
f1a1da6c A |
938 | { |
939 | thread_t th; | |
940 | int rv = 0; | |
941 | ||
f1a1da6c A |
942 | if ((th = port_name_to_thread(kport)) == THREAD_NULL) { |
943 | return ESRCH; | |
944 | } | |
945 | ||
2546420a | 946 | int override_qos = pthread_priority_get_thread_qos(priority); |
f1a1da6c A |
947 | |
948 | struct threadlist *tl = util_get_thread_threadlist_entry(th); | |
949 | if (tl) { | |
2546420a | 950 | PTHREAD_TRACE_WQ(TRACE_wq_override_start | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 1, priority, 0); |
f1a1da6c A |
951 | } |
952 | ||
953 | /* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */ | |
2546420a A |
954 | pthread_kern->proc_usynch_thread_qos_add_override_for_resource_check_owner(th, override_qos, TRUE, |
955 | resource, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE, USER_ADDR_NULL, MACH_PORT_NULL); | |
f1a1da6c A |
956 | thread_deallocate(th); |
957 | return rv; | |
958 | } | |
959 | ||
960 | int | |
215aeb03 | 961 | _bsdthread_ctl_qos_override_end(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t resource, user_addr_t arg3, int __unused *retval) |
f1a1da6c A |
962 | { |
963 | thread_t th; | |
964 | int rv = 0; | |
965 | ||
215aeb03 | 966 | if (arg3 != 0) { |
f1a1da6c A |
967 | return EINVAL; |
968 | } | |
969 | ||
970 | if ((th = port_name_to_thread(kport)) == THREAD_NULL) { | |
971 | return ESRCH; | |
972 | } | |
973 | ||
974 | struct uthread *uth = pthread_kern->get_bsdthread_info(th); | |
975 | ||
976 | struct threadlist *tl = util_get_thread_threadlist_entry(th); | |
977 | if (tl) { | |
2546420a | 978 | PTHREAD_TRACE_WQ(TRACE_wq_override_end | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 0, 0, 0); |
f1a1da6c A |
979 | } |
980 | ||
215aeb03 | 981 | pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), uth, 0, resource, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE); |
f1a1da6c A |
982 | |
983 | thread_deallocate(th); | |
984 | return rv; | |
985 | } | |
986 | ||
2546420a A |
987 | static int |
988 | _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, user_addr_t ulock_addr) | |
215aeb03 A |
989 | { |
990 | thread_t th; | |
991 | int rv = 0; | |
992 | ||
f1a1da6c A |
993 | if ((th = port_name_to_thread(kport)) == THREAD_NULL) { |
994 | return ESRCH; | |
995 | } | |
996 | ||
2546420a | 997 | int override_qos = pthread_priority_get_thread_qos(priority); |
f1a1da6c A |
998 | |
999 | struct threadlist *tl = util_get_thread_threadlist_entry(th); | |
1000 | if (!tl) { | |
1001 | thread_deallocate(th); | |
1002 | return EPERM; | |
1003 | } | |
1004 | ||
2546420a | 1005 | PTHREAD_TRACE_WQ(TRACE_wq_override_dispatch | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 1, priority, 0); |
f1a1da6c | 1006 | |
2546420a A |
1007 | rv = pthread_kern->proc_usynch_thread_qos_add_override_for_resource_check_owner(th, override_qos, TRUE, |
1008 | resource, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE, ulock_addr, kport); | |
f1a1da6c A |
1009 | |
1010 | thread_deallocate(th); | |
1011 | return rv; | |
1012 | } | |
1013 | ||
2546420a A |
1014 | int _bsdthread_ctl_qos_dispatch_asynchronous_override_add(struct proc __unused *p, user_addr_t __unused cmd, |
1015 | mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, int __unused *retval) | |
1016 | { | |
1017 | return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport, priority, resource, USER_ADDR_NULL); | |
1018 | } | |
1019 | ||
1020 | int | |
1021 | _bsdthread_ctl_qos_override_dispatch(struct proc *p __unused, user_addr_t cmd __unused, mach_port_name_t kport, pthread_priority_t priority, user_addr_t ulock_addr, int __unused *retval) | |
1022 | { | |
1023 | return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport, priority, USER_ADDR_NULL, ulock_addr); | |
1024 | } | |
1025 | ||
f1a1da6c | 1026 | int |
215aeb03 A |
1027 | _bsdthread_ctl_qos_override_reset(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval) |
1028 | { | |
1029 | if (arg1 != 0 || arg2 != 0 || arg3 != 0) { | |
1030 | return EINVAL; | |
1031 | } | |
1032 | ||
1033 | return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p, cmd, 1 /* reset_all */, 0, 0, retval); | |
1034 | } | |
1035 | ||
1036 | int | |
1037 | _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(struct proc __unused *p, user_addr_t __unused cmd, int reset_all, user_addr_t resource, user_addr_t arg3, int __unused *retval) | |
f1a1da6c | 1038 | { |
215aeb03 | 1039 | if ((reset_all && (resource != 0)) || arg3 != 0) { |
f1a1da6c A |
1040 | return EINVAL; |
1041 | } | |
1042 | ||
2546420a A |
1043 | thread_t th = current_thread(); |
1044 | struct uthread *uth = pthread_kern->get_bsdthread_info(th); | |
1045 | struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth); | |
f1a1da6c | 1046 | |
2546420a A |
1047 | if (!tl) { |
1048 | return EPERM; | |
f1a1da6c A |
1049 | } |
1050 | ||
2546420a A |
1051 | PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_NONE, tl->th_workq, 0, 0, 0, 0); |
1052 | ||
1053 | resource = reset_all ? THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD : resource; | |
1054 | pthread_kern->proc_usynch_thread_qos_reset_override_for_resource(current_task(), uth, 0, resource, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE); | |
1055 | ||
1056 | return 0; | |
f1a1da6c A |
1057 | } |
1058 | ||
1059 | int | |
1060 | _bsdthread_ctl(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval) | |
1061 | { | |
1062 | switch (cmd) { | |
2546420a A |
1063 | case BSDTHREAD_CTL_SET_QOS: |
1064 | return _bsdthread_ctl_set_qos(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval); | |
1065 | case BSDTHREAD_CTL_QOS_OVERRIDE_START: | |
1066 | return _bsdthread_ctl_qos_override_start(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval); | |
1067 | case BSDTHREAD_CTL_QOS_OVERRIDE_END: | |
1068 | return _bsdthread_ctl_qos_override_end(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval); | |
1069 | case BSDTHREAD_CTL_QOS_OVERRIDE_RESET: | |
1070 | return _bsdthread_ctl_qos_override_reset(p, cmd, arg1, arg2, arg3, retval); | |
1071 | case BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH: | |
1072 | return _bsdthread_ctl_qos_override_dispatch(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval); | |
1073 | case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD: | |
1074 | return _bsdthread_ctl_qos_dispatch_asynchronous_override_add(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval); | |
1075 | case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET: | |
1076 | return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p, cmd, (int)arg1, arg2, arg3, retval); | |
1077 | case BSDTHREAD_CTL_SET_SELF: | |
1078 | return _bsdthread_ctl_set_self(p, cmd, (pthread_priority_t)arg1, (mach_port_name_t)arg2, (_pthread_set_flags_t)arg3, retval); | |
1079 | default: | |
1080 | return EINVAL; | |
f1a1da6c A |
1081 | } |
1082 | } | |
1083 | ||
964d3577 | 1084 | #pragma mark - Workqueue Implementation |
964d3577 | 1085 | #pragma mark workqueue lock |
f1a1da6c | 1086 | |
2546420a A |
1087 | static boolean_t workqueue_lock_spin_is_acquired_kdp(struct workqueue *wq) { |
1088 | return kdp_lck_spin_is_acquired(&wq->wq_lock); | |
f1a1da6c A |
1089 | } |
1090 | ||
f1a1da6c | 1091 | static void |
2546420a | 1092 | workqueue_lock_spin(struct workqueue *wq) |
f1a1da6c | 1093 | { |
2546420a A |
1094 | boolean_t interrupt_state = ml_set_interrupts_enabled(FALSE); |
1095 | lck_spin_lock(&wq->wq_lock); | |
1096 | wq->wq_interrupt_state = interrupt_state; | |
f1a1da6c A |
1097 | } |
1098 | ||
1099 | static void | |
2546420a | 1100 | workqueue_unlock(struct workqueue *wq) |
f1a1da6c | 1101 | { |
2546420a A |
1102 | boolean_t interrupt_state = wq->wq_interrupt_state; |
1103 | lck_spin_unlock(&wq->wq_lock); | |
1104 | ml_set_interrupts_enabled(interrupt_state); | |
f1a1da6c A |
1105 | } |
1106 | ||
964d3577 | 1107 | #pragma mark workqueue add timer |
f1a1da6c | 1108 | |
964d3577 A |
1109 | /** |
1110 | * Sets up the timer which will call out to workqueue_add_timer | |
1111 | */ | |
f1a1da6c A |
1112 | static void |
1113 | workqueue_interval_timer_start(struct workqueue *wq) | |
1114 | { | |
1115 | uint64_t deadline; | |
1116 | ||
964d3577 A |
1117 | /* n.b. wq_timer_interval is reset to 0 in workqueue_add_timer if the |
1118 | ATIMER_RUNNING flag is not present. The net effect here is that if a | |
1119 | sequence of threads is required, we'll double the time before we give out | |
1120 | the next one. */ | |
f1a1da6c A |
1121 | if (wq->wq_timer_interval == 0) { |
1122 | wq->wq_timer_interval = wq_stalled_window_usecs; | |
1123 | ||
1124 | } else { | |
1125 | wq->wq_timer_interval = wq->wq_timer_interval * 2; | |
1126 | ||
1127 | if (wq->wq_timer_interval > wq_max_timer_interval_usecs) { | |
1128 | wq->wq_timer_interval = wq_max_timer_interval_usecs; | |
1129 | } | |
1130 | } | |
1131 | clock_interval_to_deadline(wq->wq_timer_interval, 1000, &deadline); | |
1132 | ||
2546420a A |
1133 | PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount, wq->wq_flags, wq->wq_timer_interval, 0); |
1134 | ||
1135 | boolean_t ret = thread_call_enter1_delayed(wq->wq_atimer_delayed_call, wq->wq_atimer_delayed_call, deadline); | |
1136 | if (ret) { | |
1137 | panic("delayed_call was already enqueued"); | |
1138 | } | |
1139 | } | |
1140 | ||
1141 | /** | |
1142 | * Immediately trigger the workqueue_add_timer | |
1143 | */ | |
1144 | static void | |
1145 | workqueue_interval_timer_trigger(struct workqueue *wq) | |
1146 | { | |
1147 | PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount, wq->wq_flags, 0, 0); | |
f1a1da6c | 1148 | |
2546420a A |
1149 | boolean_t ret = thread_call_enter1(wq->wq_atimer_immediate_call, wq->wq_atimer_immediate_call); |
1150 | if (ret) { | |
1151 | panic("immediate_call was already enqueued"); | |
1152 | } | |
f1a1da6c A |
1153 | } |
1154 | ||
964d3577 A |
1155 | /** |
1156 | * returns whether lastblocked_tsp is within wq_stalled_window_usecs of cur_ts | |
1157 | */ | |
f1a1da6c A |
1158 | static boolean_t |
1159 | wq_thread_is_busy(uint64_t cur_ts, uint64_t *lastblocked_tsp) | |
1160 | { | |
1161 | clock_sec_t secs; | |
1162 | clock_usec_t usecs; | |
1163 | uint64_t lastblocked_ts; | |
1164 | uint64_t elapsed; | |
1165 | ||
1166 | /* | |
1167 | * the timestamp is updated atomically w/o holding the workqueue lock | |
1168 | * so we need to do an atomic read of the 64 bits so that we don't see | |
1169 | * a mismatched pair of 32 bit reads... we accomplish this in an architecturally | |
1170 | * independent fashion by using OSCompareAndSwap64 to write back the | |
1171 | * value we grabbed... if it succeeds, then we have a good timestamp to | |
1172 | * evaluate... if it fails, we straddled grabbing the timestamp while it | |
1173 | * was being updated... treat a failed update as a busy thread since | |
1174 | * it implies we are about to see a really fresh timestamp anyway | |
1175 | */ | |
1176 | lastblocked_ts = *lastblocked_tsp; | |
1177 | ||
1178 | if ( !OSCompareAndSwap64((UInt64)lastblocked_ts, (UInt64)lastblocked_ts, lastblocked_tsp)) | |
1179 | return (TRUE); | |
1180 | ||
1181 | if (lastblocked_ts >= cur_ts) { | |
1182 | /* | |
1183 | * because the update of the timestamp when a thread blocks isn't | |
1184 | * serialized against us looking at it (i.e. we don't hold the workq lock) | |
1185 | * it's possible to have a timestamp that matches the current time or | |
1186 | * that even looks to be in the future relative to when we grabbed the current | |
1187 | * time... just treat this as a busy thread since it must have just blocked. | |
1188 | */ | |
1189 | return (TRUE); | |
1190 | } | |
1191 | elapsed = cur_ts - lastblocked_ts; | |
1192 | ||
1193 | pthread_kern->absolutetime_to_microtime(elapsed, &secs, &usecs); | |
1194 | ||
1195 | if (secs == 0 && usecs < wq_stalled_window_usecs) | |
1196 | return (TRUE); | |
1197 | return (FALSE); | |
1198 | } | |
1199 | ||
2546420a A |
1200 | static inline bool |
1201 | WQ_TIMER_DELAYED_NEEDED(struct workqueue *wq) | |
1202 | { | |
1203 | int oldflags; | |
1204 | retry: | |
1205 | oldflags = wq->wq_flags; | |
1206 | if ( !(oldflags & (WQ_EXITING | WQ_ATIMER_DELAYED_RUNNING))) { | |
1207 | if (OSCompareAndSwap(oldflags, oldflags | WQ_ATIMER_DELAYED_RUNNING, (UInt32 *)&wq->wq_flags)) { | |
1208 | return true; | |
1209 | } else { | |
1210 | goto retry; | |
1211 | } | |
1212 | } | |
1213 | return false; | |
1214 | } | |
1215 | ||
1216 | static inline bool | |
1217 | WQ_TIMER_IMMEDIATE_NEEDED(struct workqueue *wq) | |
1218 | { | |
1219 | int oldflags; | |
1220 | retry: | |
1221 | oldflags = wq->wq_flags; | |
1222 | if ( !(oldflags & (WQ_EXITING | WQ_ATIMER_IMMEDIATE_RUNNING))) { | |
1223 | if (OSCompareAndSwap(oldflags, oldflags | WQ_ATIMER_IMMEDIATE_RUNNING, (UInt32 *)&wq->wq_flags)) { | |
1224 | return true; | |
1225 | } else { | |
1226 | goto retry; | |
1227 | } | |
1228 | } | |
1229 | return false; | |
1230 | } | |
f1a1da6c | 1231 | |
964d3577 A |
1232 | /** |
1233 | * handler function for the timer | |
1234 | */ | |
f1a1da6c | 1235 | static void |
2546420a | 1236 | workqueue_add_timer(struct workqueue *wq, thread_call_t thread_call_self) |
f1a1da6c A |
1237 | { |
1238 | proc_t p; | |
1239 | boolean_t start_timer = FALSE; | |
1240 | boolean_t retval; | |
964d3577 | 1241 | |
2546420a | 1242 | PTHREAD_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_START, wq, wq->wq_flags, wq->wq_nthreads, wq->wq_thidlecount, 0); |
f1a1da6c A |
1243 | |
1244 | p = wq->wq_proc; | |
1245 | ||
2546420a | 1246 | workqueue_lock_spin(wq); |
f1a1da6c A |
1247 | |
1248 | /* | |
2546420a | 1249 | * There's two tricky issues here. |
f1a1da6c | 1250 | * |
2546420a A |
1251 | * First issue: we start the thread_call's that invoke this routine without |
1252 | * the workqueue lock held. The scheduler callback needs to trigger | |
1253 | * reevaluation of the number of running threads but shouldn't take that | |
1254 | * lock, so we can't use it to synchronize state around the thread_call. | |
1255 | * As a result, it might re-enter the thread_call while this routine is | |
1256 | * already running. This could cause it to fire a second time and we'll | |
1257 | * have two add_timers running at once. Obviously, we don't want that to | |
1258 | * keep stacking, so we need to keep it at two timers. | |
1259 | * | |
1260 | * Solution: use wq_flags (accessed via atomic CAS) to synchronize the | |
1261 | * enqueue of the thread_call itself. When a thread needs to trigger the | |
1262 | * add_timer, it checks for ATIMER_DELAYED_RUNNING and, when not set, sets | |
1263 | * the flag then does a thread_call_enter. We'll then remove that flag | |
1264 | * only once we've got the lock and it's safe for the thread_call to be | |
1265 | * entered again. | |
1266 | * | |
1267 | * Second issue: we need to make sure that the two timers don't execute this | |
1268 | * routine concurrently. We can't use the workqueue lock for this because | |
1269 | * we'll need to drop it during our execution. | |
1270 | * | |
1271 | * Solution: use WQL_ATIMER_BUSY as a condition variable to indicate that | |
1272 | * we are currently executing the routine and the next thread should wait. | |
1273 | * | |
1274 | * After all that, we arrive at the following four possible states: | |
1275 | * !WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY no pending timer, no active timer | |
1276 | * !WQ_ATIMER_DELAYED_RUNNING && WQL_ATIMER_BUSY no pending timer, 1 active timer | |
1277 | * WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY 1 pending timer, no active timer | |
1278 | * WQ_ATIMER_DELAYED_RUNNING && WQL_ATIMER_BUSY 1 pending timer, 1 active timer | |
1279 | * | |
1280 | * Further complication sometimes we need to trigger this function to run | |
1281 | * without delay. Because we aren't under a lock between setting | |
1282 | * WQ_ATIMER_DELAYED_RUNNING and calling thread_call_enter, we can't simply | |
1283 | * re-enter the thread call: if thread_call_enter() returned false, we | |
1284 | * wouldn't be able to distinguish the case where the thread_call had | |
1285 | * already fired from the case where it hadn't been entered yet from the | |
1286 | * other thread. So, we use a separate thread_call for immediate | |
1287 | * invocations, and a separate RUNNING flag, WQ_ATIMER_IMMEDIATE_RUNNING. | |
f1a1da6c | 1288 | */ |
2546420a | 1289 | |
f1a1da6c A |
1290 | while (wq->wq_lflags & WQL_ATIMER_BUSY) { |
1291 | wq->wq_lflags |= WQL_ATIMER_WAITING; | |
1292 | ||
1293 | assert_wait((caddr_t)wq, (THREAD_UNINT)); | |
2546420a | 1294 | workqueue_unlock(wq); |
f1a1da6c A |
1295 | |
1296 | thread_block(THREAD_CONTINUE_NULL); | |
1297 | ||
2546420a | 1298 | workqueue_lock_spin(wq); |
f1a1da6c A |
1299 | } |
1300 | wq->wq_lflags |= WQL_ATIMER_BUSY; | |
1301 | ||
1302 | /* | |
2546420a | 1303 | * Decide which timer we are and remove the RUNNING flag. |
f1a1da6c | 1304 | */ |
2546420a A |
1305 | if (thread_call_self == wq->wq_atimer_delayed_call) { |
1306 | if ((wq->wq_flags & WQ_ATIMER_DELAYED_RUNNING) == 0) { | |
1307 | panic("workqueue_add_timer is the delayed timer but the delayed running flag isn't set"); | |
1308 | } | |
1309 | WQ_UNSETFLAG(wq, WQ_ATIMER_DELAYED_RUNNING); | |
1310 | } else if (thread_call_self == wq->wq_atimer_immediate_call) { | |
1311 | if ((wq->wq_flags & WQ_ATIMER_IMMEDIATE_RUNNING) == 0) { | |
1312 | panic("workqueue_add_timer is the immediate timer but the immediate running flag isn't set"); | |
1313 | } | |
1314 | WQ_UNSETFLAG(wq, WQ_ATIMER_IMMEDIATE_RUNNING); | |
1315 | } else { | |
1316 | panic("workqueue_add_timer can't figure out which timer it is"); | |
1317 | } | |
f1a1da6c A |
1318 | |
1319 | again: | |
1320 | retval = TRUE; | |
f1a1da6c | 1321 | if ( !(wq->wq_flags & WQ_EXITING)) { |
964d3577 | 1322 | boolean_t add_thread = FALSE; |
f1a1da6c A |
1323 | /* |
1324 | * check to see if the stall frequency was beyond our tolerance | |
2546420a | 1325 | * or we have work on the queue, but haven't scheduled any |
f1a1da6c A |
1326 | * new work within our acceptable time interval because |
1327 | * there were no idle threads left to schedule | |
1328 | */ | |
1329 | if (wq->wq_reqcount) { | |
964d3577 A |
1330 | uint32_t priclass = 0; |
1331 | uint32_t thactive_count = 0; | |
1332 | uint64_t curtime = mach_absolute_time(); | |
1333 | uint64_t busycount = 0; | |
1334 | ||
1335 | if (wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] && | |
1336 | wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0){ | |
1337 | priclass = WORKQUEUE_EVENT_MANAGER_BUCKET; | |
1338 | } else { | |
1339 | for (priclass = 0; priclass < WORKQUEUE_NUM_BUCKETS; priclass++) { | |
1340 | if (wq->wq_requests[priclass]) | |
1341 | break; | |
1342 | } | |
f1a1da6c | 1343 | } |
f1a1da6c | 1344 | |
964d3577 A |
1345 | if (priclass < WORKQUEUE_EVENT_MANAGER_BUCKET){ |
1346 | /* | |
1347 | * Compute a metric for many how many threads are active. We | |
1348 | * find the highest priority request outstanding and then add up | |
1349 | * the number of active threads in that and all higher-priority | |
1350 | * buckets. We'll also add any "busy" threads which are not | |
1351 | * active but blocked recently enough that we can't be sure | |
1352 | * they've gone idle yet. We'll then compare this metric to our | |
1353 | * max concurrency to decide whether to add a new thread. | |
1354 | */ | |
1355 | for (uint32_t i = 0; i <= priclass; i++) { | |
1356 | thactive_count += wq->wq_thactive_count[i]; | |
f1a1da6c | 1357 | |
2546420a | 1358 | if (wq->wq_thscheduled_count[i] < wq->wq_thactive_count[i]) { |
964d3577 A |
1359 | if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i])) |
1360 | busycount++; | |
1361 | } | |
f1a1da6c A |
1362 | } |
1363 | } | |
964d3577 A |
1364 | |
1365 | if (thactive_count + busycount < wq->wq_max_concurrency || | |
1366 | priclass == WORKQUEUE_EVENT_MANAGER_BUCKET) { | |
f1a1da6c A |
1367 | |
1368 | if (wq->wq_thidlecount == 0) { | |
1369 | /* | |
1370 | * if we have no idle threads, try to add one | |
1371 | */ | |
964d3577 | 1372 | retval = workqueue_addnewthread(wq, priclass == WORKQUEUE_EVENT_MANAGER_BUCKET); |
f1a1da6c A |
1373 | } |
1374 | add_thread = TRUE; | |
1375 | } | |
1376 | ||
1377 | if (wq->wq_reqcount) { | |
1378 | /* | |
1379 | * as long as we have threads to schedule, and we successfully | |
1380 | * scheduled new work, keep trying | |
1381 | */ | |
1382 | while (wq->wq_thidlecount && !(wq->wq_flags & WQ_EXITING)) { | |
1383 | /* | |
1384 | * workqueue_run_nextreq is responsible for | |
1385 | * dropping the workqueue lock in all cases | |
1386 | */ | |
2546420a A |
1387 | retval = (workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_ADD_TIMER, 0, false) != THREAD_NULL); |
1388 | workqueue_lock_spin(wq); | |
f1a1da6c A |
1389 | |
1390 | if (retval == FALSE) | |
1391 | break; | |
1392 | } | |
1393 | if ( !(wq->wq_flags & WQ_EXITING) && wq->wq_reqcount) { | |
1394 | ||
1395 | if (wq->wq_thidlecount == 0 && retval == TRUE && add_thread == TRUE) | |
1396 | goto again; | |
1397 | ||
2546420a A |
1398 | if (wq->wq_thidlecount == 0 || busycount) { |
1399 | start_timer = WQ_TIMER_DELAYED_NEEDED(wq); | |
1400 | } | |
f1a1da6c | 1401 | |
2546420a | 1402 | PTHREAD_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_NONE, wq, wq->wq_reqcount, wq->wq_thidlecount, busycount, 0); |
f1a1da6c A |
1403 | } |
1404 | } | |
1405 | } | |
1406 | } | |
964d3577 | 1407 | |
2546420a | 1408 | /* |
964d3577 A |
1409 | * If we called WQ_TIMER_NEEDED above, then this flag will be set if that |
1410 | * call marked the timer running. If so, we let the timer interval grow. | |
1411 | * Otherwise, we reset it back to 0. | |
1412 | */ | |
2546420a | 1413 | if (!(wq->wq_flags & WQ_ATIMER_DELAYED_RUNNING)) { |
f1a1da6c | 1414 | wq->wq_timer_interval = 0; |
2546420a | 1415 | } |
f1a1da6c A |
1416 | |
1417 | wq->wq_lflags &= ~WQL_ATIMER_BUSY; | |
1418 | ||
1419 | if ((wq->wq_flags & WQ_EXITING) || (wq->wq_lflags & WQL_ATIMER_WAITING)) { | |
1420 | /* | |
2546420a | 1421 | * wakeup the thread hung up in _workqueue_mark_exiting or workqueue_add_timer waiting for this timer |
f1a1da6c A |
1422 | * to finish getting out of the way |
1423 | */ | |
1424 | wq->wq_lflags &= ~WQL_ATIMER_WAITING; | |
1425 | wakeup(wq); | |
1426 | } | |
1427 | ||
2546420a | 1428 | PTHREAD_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_END, wq, start_timer, wq->wq_nthreads, wq->wq_thidlecount, 0); |
f1a1da6c | 1429 | |
2546420a | 1430 | workqueue_unlock(wq); |
f1a1da6c | 1431 | |
964d3577 A |
1432 | if (start_timer == TRUE) |
1433 | workqueue_interval_timer_start(wq); | |
f1a1da6c A |
1434 | } |
1435 | ||
964d3577 | 1436 | #pragma mark thread state tracking |
f1a1da6c | 1437 | |
964d3577 | 1438 | // called by spinlock code when trying to yield to lock owner |
f1a1da6c A |
1439 | void |
1440 | _workqueue_thread_yielded(void) | |
1441 | { | |
1442 | struct workqueue *wq; | |
1443 | proc_t p; | |
1444 | ||
1445 | p = current_proc(); | |
1446 | ||
1447 | if ((wq = pthread_kern->proc_get_wqptr(p)) == NULL || wq->wq_reqcount == 0) | |
1448 | return; | |
2546420a A |
1449 | |
1450 | workqueue_lock_spin(wq); | |
f1a1da6c A |
1451 | |
1452 | if (wq->wq_reqcount) { | |
1453 | uint64_t curtime; | |
1454 | uint64_t elapsed; | |
1455 | clock_sec_t secs; | |
1456 | clock_usec_t usecs; | |
1457 | ||
1458 | if (wq->wq_thread_yielded_count++ == 0) | |
1459 | wq->wq_thread_yielded_timestamp = mach_absolute_time(); | |
1460 | ||
1461 | if (wq->wq_thread_yielded_count < wq_yielded_threshold) { | |
2546420a | 1462 | workqueue_unlock(wq); |
f1a1da6c A |
1463 | return; |
1464 | } | |
1465 | ||
2546420a | 1466 | PTHREAD_TRACE_WQ(TRACE_wq_thread_yielded | DBG_FUNC_START, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 0, 0); |
f1a1da6c A |
1467 | |
1468 | wq->wq_thread_yielded_count = 0; | |
1469 | ||
1470 | curtime = mach_absolute_time(); | |
1471 | elapsed = curtime - wq->wq_thread_yielded_timestamp; | |
1472 | pthread_kern->absolutetime_to_microtime(elapsed, &secs, &usecs); | |
1473 | ||
1474 | if (secs == 0 && usecs < wq_yielded_window_usecs) { | |
1475 | ||
1476 | if (wq->wq_thidlecount == 0) { | |
1477 | workqueue_addnewthread(wq, TRUE); | |
1478 | /* | |
1479 | * 'workqueue_addnewthread' drops the workqueue lock | |
1480 | * when creating the new thread and then retakes it before | |
1481 | * returning... this window allows other threads to process | |
1482 | * requests, so we need to recheck for available work | |
1483 | * if none found, we just return... the newly created thread | |
1484 | * will eventually get used (if it hasn't already)... | |
1485 | */ | |
1486 | if (wq->wq_reqcount == 0) { | |
2546420a | 1487 | workqueue_unlock(wq); |
f1a1da6c A |
1488 | return; |
1489 | } | |
1490 | } | |
1491 | if (wq->wq_thidlecount) { | |
2546420a | 1492 | (void)workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_UNCONSTRAINED, 0, false); |
f1a1da6c A |
1493 | /* |
1494 | * workqueue_run_nextreq is responsible for | |
1495 | * dropping the workqueue lock in all cases | |
1496 | */ | |
2546420a | 1497 | PTHREAD_TRACE_WQ(TRACE_wq_thread_yielded | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 1, 0); |
f1a1da6c A |
1498 | |
1499 | return; | |
1500 | } | |
1501 | } | |
2546420a | 1502 | PTHREAD_TRACE_WQ(TRACE_wq_thread_yielded | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 2, 0); |
f1a1da6c | 1503 | } |
2546420a | 1504 | workqueue_unlock(wq); |
f1a1da6c A |
1505 | } |
1506 | ||
f1a1da6c A |
1507 | static void |
1508 | workqueue_callback(int type, thread_t thread) | |
1509 | { | |
1510 | struct uthread *uth; | |
1511 | struct threadlist *tl; | |
1512 | struct workqueue *wq; | |
1513 | ||
1514 | uth = pthread_kern->get_bsdthread_info(thread); | |
1515 | tl = pthread_kern->uthread_get_threadlist(uth); | |
1516 | wq = tl->th_workq; | |
1517 | ||
1518 | switch (type) { | |
1519 | case SCHED_CALL_BLOCK: { | |
1520 | uint32_t old_activecount; | |
1521 | boolean_t start_timer = FALSE; | |
1522 | ||
1523 | old_activecount = OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority]); | |
1524 | ||
964d3577 A |
1525 | /* |
1526 | * If we blocked and were at the requested concurrency previously, we may | |
1527 | * need to spin up a new thread. Of course, if it's the event manager | |
1528 | * then that's moot, so ignore that case. | |
1529 | */ | |
1530 | if (old_activecount == wq->wq_reqconc[tl->th_priority] && | |
1531 | tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET) { | |
f1a1da6c A |
1532 | uint64_t curtime; |
1533 | UInt64 *lastblocked_ptr; | |
1534 | ||
1535 | /* | |
1536 | * the number of active threads at this priority | |
1537 | * has fallen below the maximum number of concurrent | |
1538 | * threads that we're allowed to run | |
1539 | */ | |
1540 | lastblocked_ptr = (UInt64 *)&wq->wq_lastblocked_ts[tl->th_priority]; | |
1541 | curtime = mach_absolute_time(); | |
1542 | ||
1543 | /* | |
1544 | * if we collide with another thread trying to update the last_blocked (really unlikely | |
1545 | * since another thread would have to get scheduled and then block after we start down | |
1546 | * this path), it's not a problem. Either timestamp is adequate, so no need to retry | |
1547 | */ | |
1548 | ||
1549 | OSCompareAndSwap64(*lastblocked_ptr, (UInt64)curtime, lastblocked_ptr); | |
1550 | ||
1551 | if (wq->wq_reqcount) { | |
1552 | /* | |
2546420a A |
1553 | * We have work to do so start up the timer if it's not |
1554 | * running; it'll sort out whether we need to start another | |
1555 | * thread | |
f1a1da6c | 1556 | */ |
2546420a | 1557 | start_timer = WQ_TIMER_DELAYED_NEEDED(wq); |
f1a1da6c A |
1558 | } |
1559 | ||
1560 | if (start_timer == TRUE) { | |
1561 | workqueue_interval_timer_start(wq); | |
1562 | } | |
1563 | } | |
2546420a | 1564 | PTHREAD_TRACE1_WQ(TRACE_wq_thread_block | DBG_FUNC_START, wq, old_activecount, tl->th_priority, start_timer, thread_tid(thread)); |
f1a1da6c A |
1565 | break; |
1566 | } | |
1567 | case SCHED_CALL_UNBLOCK: | |
1568 | /* | |
1569 | * we cannot take the workqueue_lock here... | |
1570 | * an UNBLOCK can occur from a timer event which | |
1571 | * is run from an interrupt context... if the workqueue_lock | |
1572 | * is already held by this processor, we'll deadlock... | |
1573 | * the thread lock for the thread being UNBLOCKED | |
1574 | * is also held | |
1575 | */ | |
1576 | OSAddAtomic(1, &wq->wq_thactive_count[tl->th_priority]); | |
2546420a A |
1577 | |
1578 | PTHREAD_TRACE1_WQ(TRACE_wq_thread_block | DBG_FUNC_END, wq, wq->wq_threads_scheduled, tl->th_priority, 0, thread_tid(thread)); | |
1579 | ||
f1a1da6c A |
1580 | break; |
1581 | } | |
1582 | } | |
1583 | ||
1584 | sched_call_t | |
1585 | _workqueue_get_sched_callback(void) | |
1586 | { | |
1587 | return workqueue_callback; | |
1588 | } | |
1589 | ||
964d3577 A |
1590 | #pragma mark thread addition/removal |
1591 | ||
2546420a A |
1592 | static mach_vm_size_t |
1593 | _workqueue_allocsize(struct workqueue *wq) | |
1594 | { | |
1595 | proc_t p = wq->wq_proc; | |
1596 | mach_vm_size_t guardsize = vm_map_page_size(wq->wq_map); | |
1597 | mach_vm_size_t pthread_size = | |
1598 | vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(wq->wq_map)); | |
1599 | return guardsize + PTH_DEFAULT_STACKSIZE + pthread_size; | |
1600 | } | |
1601 | ||
964d3577 A |
1602 | /** |
1603 | * pop goes the thread | |
2546420a A |
1604 | * |
1605 | * If fromexit is set, the call is from workqueue_exit(, | |
1606 | * so some cleanups are to be avoided. | |
964d3577 | 1607 | */ |
f1a1da6c | 1608 | static void |
2546420a | 1609 | workqueue_removethread(struct threadlist *tl, bool fromexit, bool first_use) |
f1a1da6c | 1610 | { |
f1a1da6c | 1611 | struct uthread * uth; |
2546420a | 1612 | struct workqueue * wq = tl->th_workq; |
f1a1da6c | 1613 | |
2546420a A |
1614 | if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET){ |
1615 | TAILQ_REMOVE(&wq->wq_thidlemgrlist, tl, th_entry); | |
1616 | } else { | |
1617 | TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry); | |
1618 | } | |
f1a1da6c A |
1619 | |
1620 | if (fromexit == 0) { | |
2546420a | 1621 | assert(wq->wq_nthreads && wq->wq_thidlecount); |
f1a1da6c A |
1622 | wq->wq_nthreads--; |
1623 | wq->wq_thidlecount--; | |
1624 | } | |
1625 | ||
1626 | /* | |
2546420a | 1627 | * Clear the threadlist pointer in uthread so |
f1a1da6c A |
1628 | * blocked thread on wakeup for termination will |
1629 | * not access the thread list as it is going to be | |
1630 | * freed. | |
1631 | */ | |
1632 | pthread_kern->thread_sched_call(tl->th_thread, NULL); | |
1633 | ||
1634 | uth = pthread_kern->get_bsdthread_info(tl->th_thread); | |
1635 | if (uth != (struct uthread *)0) { | |
1636 | pthread_kern->uthread_set_threadlist(uth, NULL); | |
1637 | } | |
1638 | if (fromexit == 0) { | |
1639 | /* during exit the lock is not held */ | |
2546420a | 1640 | workqueue_unlock(wq); |
f1a1da6c A |
1641 | } |
1642 | ||
2546420a | 1643 | if ( (tl->th_flags & TH_LIST_NEW) || first_use ) { |
f1a1da6c | 1644 | /* |
2546420a | 1645 | * thread was created, but never used... |
f1a1da6c A |
1646 | * need to clean up the stack and port ourselves |
1647 | * since we're not going to spin up through the | |
1648 | * normal exit path triggered from Libc | |
1649 | */ | |
1650 | if (fromexit == 0) { | |
1651 | /* vm map is already deallocated when this is called from exit */ | |
2546420a | 1652 | (void)mach_vm_deallocate(wq->wq_map, tl->th_stackaddr, _workqueue_allocsize(wq)); |
f1a1da6c A |
1653 | } |
1654 | (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(wq->wq_task), tl->th_thport); | |
1655 | ||
f1a1da6c A |
1656 | } else { |
1657 | ||
2546420a | 1658 | PTHREAD_TRACE1_WQ(TRACE_wq_thread_park | DBG_FUNC_END, wq, (uintptr_t)thread_tid(current_thread()), wq->wq_nthreads, 0xdead, thread_tid(tl->th_thread)); |
f1a1da6c A |
1659 | } |
1660 | /* | |
1661 | * drop our ref on the thread | |
1662 | */ | |
1663 | thread_deallocate(tl->th_thread); | |
1664 | ||
1665 | kfree(tl, sizeof(struct threadlist)); | |
1666 | } | |
1667 | ||
1668 | ||
964d3577 A |
1669 | /** |
1670 | * Try to add a new workqueue thread. | |
1671 | * | |
1672 | * - called with workq lock held | |
1673 | * - dropped and retaken around thread creation | |
1674 | * - return with workq lock held | |
f1a1da6c A |
1675 | */ |
1676 | static boolean_t | |
964d3577 | 1677 | workqueue_addnewthread(struct workqueue *wq, boolean_t ignore_constrained_thread_limit) |
f1a1da6c A |
1678 | { |
1679 | struct threadlist *tl; | |
1680 | struct uthread *uth; | |
1681 | kern_return_t kret; | |
1682 | thread_t th; | |
1683 | proc_t p; | |
1684 | void *sright; | |
1685 | mach_vm_offset_t stackaddr; | |
f1a1da6c | 1686 | |
964d3577 | 1687 | if ((wq->wq_flags & WQ_EXITING) == WQ_EXITING) { |
2546420a | 1688 | PTHREAD_TRACE_WQ(TRACE_wq_thread_add_during_exit | DBG_FUNC_NONE, wq, 0, 0, 0, 0); |
f1a1da6c | 1689 | return (FALSE); |
964d3577 | 1690 | } |
f1a1da6c | 1691 | |
2546420a A |
1692 | if (wq->wq_nthreads >= wq_max_threads) { |
1693 | PTHREAD_TRACE_WQ(TRACE_wq_thread_limit_exceeded | DBG_FUNC_NONE, wq, wq->wq_nthreads, wq_max_threads, 0, 0); | |
f1a1da6c A |
1694 | return (FALSE); |
1695 | } | |
f1a1da6c | 1696 | |
964d3577 A |
1697 | if (ignore_constrained_thread_limit == FALSE && |
1698 | wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) { | |
2546420a | 1699 | /* |
964d3577 A |
1700 | * If we're not creating this thread to service an overcommit or |
1701 | * event manager request, then we check to see if we are over our | |
1702 | * constrained thread limit, in which case we error out. | |
f1a1da6c | 1703 | */ |
2546420a | 1704 | PTHREAD_TRACE_WQ(TRACE_wq_thread_constrained_maxed | DBG_FUNC_NONE, wq, wq->wq_constrained_threads_scheduled, |
964d3577 | 1705 | wq_max_constrained_threads, 0, 0); |
f1a1da6c A |
1706 | return (FALSE); |
1707 | } | |
f1a1da6c A |
1708 | |
1709 | wq->wq_nthreads++; | |
1710 | ||
1711 | p = wq->wq_proc; | |
2546420a A |
1712 | workqueue_unlock(wq); |
1713 | ||
1714 | tl = kalloc(sizeof(struct threadlist)); | |
1715 | bzero(tl, sizeof(struct threadlist)); | |
f1a1da6c | 1716 | |
2546420a | 1717 | kret = pthread_kern->thread_create_workq_waiting(wq->wq_task, wq_unpark_continue, tl, &th); |
f1a1da6c | 1718 | if (kret != KERN_SUCCESS) { |
2546420a A |
1719 | PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 0, 0, 0); |
1720 | kfree(tl, sizeof(struct threadlist)); | |
f1a1da6c A |
1721 | goto failed; |
1722 | } | |
1723 | ||
2546420a | 1724 | stackaddr = pthread_kern->proc_get_stack_addr_hint(p); |
964d3577 A |
1725 | |
1726 | mach_vm_size_t guardsize = vm_map_page_size(wq->wq_map); | |
2546420a | 1727 | mach_vm_size_t pthread_size = |
964d3577 | 1728 | vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(wq->wq_map)); |
2546420a | 1729 | mach_vm_size_t th_allocsize = guardsize + PTH_DEFAULT_STACKSIZE + pthread_size; |
f1a1da6c A |
1730 | |
1731 | kret = mach_vm_map(wq->wq_map, &stackaddr, | |
2546420a A |
1732 | th_allocsize, page_size-1, |
1733 | VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE, NULL, | |
1734 | 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, | |
1735 | VM_INHERIT_DEFAULT); | |
f1a1da6c A |
1736 | |
1737 | if (kret != KERN_SUCCESS) { | |
2546420a | 1738 | PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 1, 0, 0); |
964d3577 A |
1739 | |
1740 | kret = mach_vm_allocate(wq->wq_map, | |
2546420a | 1741 | &stackaddr, th_allocsize, |
964d3577 | 1742 | VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE); |
f1a1da6c A |
1743 | } |
1744 | if (kret == KERN_SUCCESS) { | |
964d3577 | 1745 | /* |
f1a1da6c A |
1746 | * The guard page is at the lowest address |
1747 | * The stack base is the highest address | |
1748 | */ | |
964d3577 | 1749 | kret = mach_vm_protect(wq->wq_map, stackaddr, guardsize, FALSE, VM_PROT_NONE); |
f1a1da6c | 1750 | |
964d3577 | 1751 | if (kret != KERN_SUCCESS) { |
2546420a A |
1752 | (void) mach_vm_deallocate(wq->wq_map, stackaddr, th_allocsize); |
1753 | PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 2, 0, 0); | |
964d3577 | 1754 | } |
f1a1da6c A |
1755 | } |
1756 | if (kret != KERN_SUCCESS) { | |
1757 | (void) thread_terminate(th); | |
1758 | thread_deallocate(th); | |
1759 | ||
1760 | kfree(tl, sizeof(struct threadlist)); | |
1761 | goto failed; | |
1762 | } | |
1763 | thread_reference(th); | |
1764 | ||
2546420a A |
1765 | pthread_kern->thread_set_tag(th, THREAD_TAG_PTHREAD | THREAD_TAG_WORKQUEUE); |
1766 | ||
f1a1da6c A |
1767 | sright = (void *)pthread_kern->convert_thread_to_port(th); |
1768 | tl->th_thport = pthread_kern->ipc_port_copyout_send(sright, pthread_kern->task_get_ipcspace(wq->wq_task)); | |
1769 | ||
1770 | pthread_kern->thread_static_param(th, TRUE); | |
1771 | ||
2546420a | 1772 | tl->th_flags = TH_LIST_INITED | TH_LIST_NEW; |
f1a1da6c A |
1773 | |
1774 | tl->th_thread = th; | |
1775 | tl->th_workq = wq; | |
1776 | tl->th_stackaddr = stackaddr; | |
1777 | tl->th_priority = WORKQUEUE_NUM_BUCKETS; | |
f1a1da6c A |
1778 | |
1779 | uth = pthread_kern->get_bsdthread_info(tl->th_thread); | |
1780 | ||
2546420a | 1781 | workqueue_lock_spin(wq); |
f1a1da6c A |
1782 | |
1783 | pthread_kern->uthread_set_threadlist(uth, tl); | |
1784 | TAILQ_INSERT_TAIL(&wq->wq_thidlelist, tl, th_entry); | |
1785 | ||
1786 | wq->wq_thidlecount++; | |
1787 | ||
2546420a | 1788 | PTHREAD_TRACE_WQ(TRACE_wq_thread_create | DBG_FUNC_NONE, wq, 0, 0, 0, 0); |
f1a1da6c A |
1789 | |
1790 | return (TRUE); | |
1791 | ||
1792 | failed: | |
2546420a | 1793 | workqueue_lock_spin(wq); |
f1a1da6c A |
1794 | wq->wq_nthreads--; |
1795 | ||
1796 | return (FALSE); | |
1797 | } | |
1798 | ||
964d3577 A |
1799 | /** |
1800 | * Setup per-process state for the workqueue. | |
1801 | */ | |
f1a1da6c A |
1802 | int |
1803 | _workq_open(struct proc *p, __unused int32_t *retval) | |
1804 | { | |
1805 | struct workqueue * wq; | |
1806 | int wq_size; | |
1807 | char * ptr; | |
1808 | uint32_t i; | |
1809 | uint32_t num_cpus; | |
1810 | int error = 0; | |
f1a1da6c A |
1811 | |
1812 | if (pthread_kern->proc_get_register(p) == 0) { | |
1813 | return EINVAL; | |
1814 | } | |
1815 | ||
1816 | num_cpus = pthread_kern->ml_get_max_cpus(); | |
1817 | ||
1818 | if (wq_init_constrained_limit) { | |
1819 | uint32_t limit; | |
1820 | /* | |
1821 | * set up the limit for the constrained pool | |
1822 | * this is a virtual pool in that we don't | |
1823 | * maintain it on a separate idle and run list | |
1824 | */ | |
1825 | limit = num_cpus * WORKQUEUE_CONSTRAINED_FACTOR; | |
1826 | ||
1827 | if (limit > wq_max_constrained_threads) | |
1828 | wq_max_constrained_threads = limit; | |
1829 | ||
1830 | wq_init_constrained_limit = 0; | |
2546420a A |
1831 | |
1832 | if (wq_max_threads > pthread_kern->config_thread_max - 20) { | |
1833 | wq_max_threads = pthread_kern->config_thread_max - 20; | |
1834 | } | |
f1a1da6c | 1835 | } |
f1a1da6c A |
1836 | |
1837 | if (pthread_kern->proc_get_wqptr(p) == NULL) { | |
2546420a A |
1838 | if (pthread_kern->proc_init_wqptr_or_wait(p) == FALSE) { |
1839 | assert(pthread_kern->proc_get_wqptr(p) != NULL); | |
f1a1da6c A |
1840 | goto out; |
1841 | } | |
1842 | ||
f1a1da6c A |
1843 | wq_size = sizeof(struct workqueue); |
1844 | ||
1845 | ptr = (char *)kalloc(wq_size); | |
1846 | bzero(ptr, wq_size); | |
1847 | ||
1848 | wq = (struct workqueue *)ptr; | |
1849 | wq->wq_flags = WQ_LIST_INITED; | |
1850 | wq->wq_proc = p; | |
964d3577 | 1851 | wq->wq_max_concurrency = wq_max_concurrency; |
f1a1da6c A |
1852 | wq->wq_task = current_task(); |
1853 | wq->wq_map = pthread_kern->current_map(); | |
1854 | ||
1855 | for (i = 0; i < WORKQUEUE_NUM_BUCKETS; i++) | |
1856 | wq->wq_reqconc[i] = (uint16_t)wq->wq_max_concurrency; | |
1857 | ||
964d3577 A |
1858 | // The event manager bucket is special, so its gets a concurrency of 1 |
1859 | // though we shouldn't ever read this value for that bucket | |
1860 | wq->wq_reqconc[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1; | |
1861 | ||
2546420a A |
1862 | // Start the event manager at the priority hinted at by the policy engine |
1863 | int mgr_priority_hint = pthread_kern->task_get_default_manager_qos(current_task()); | |
1864 | wq->wq_event_manager_priority = (uint32_t)thread_qos_get_pthread_priority(mgr_priority_hint) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG; | |
964d3577 | 1865 | |
f1a1da6c A |
1866 | TAILQ_INIT(&wq->wq_thrunlist); |
1867 | TAILQ_INIT(&wq->wq_thidlelist); | |
1868 | ||
2546420a A |
1869 | wq->wq_atimer_delayed_call = thread_call_allocate((thread_call_func_t)workqueue_add_timer, (thread_call_param_t)wq); |
1870 | wq->wq_atimer_immediate_call = thread_call_allocate((thread_call_func_t)workqueue_add_timer, (thread_call_param_t)wq); | |
f1a1da6c | 1871 | |
2546420a | 1872 | lck_spin_init(&wq->wq_lock, pthread_lck_grp, pthread_lck_attr); |
f1a1da6c A |
1873 | |
1874 | pthread_kern->proc_set_wqptr(p, wq); | |
f1a1da6c | 1875 | |
f1a1da6c A |
1876 | } |
1877 | out: | |
f1a1da6c | 1878 | |
f1a1da6c A |
1879 | return(error); |
1880 | } | |
1881 | ||
f1a1da6c A |
1882 | /* |
1883 | * Routine: workqueue_mark_exiting | |
1884 | * | |
1885 | * Function: Mark the work queue such that new threads will not be added to the | |
964d3577 | 1886 | * work queue after we return. |
f1a1da6c A |
1887 | * |
1888 | * Conditions: Called against the current process. | |
1889 | */ | |
1890 | void | |
1891 | _workqueue_mark_exiting(struct proc *p) | |
1892 | { | |
1893 | struct workqueue *wq = pthread_kern->proc_get_wqptr(p); | |
1894 | ||
1895 | if (wq != NULL) { | |
1896 | ||
2546420a | 1897 | PTHREAD_TRACE_WQ(TRACE_wq_pthread_exit|DBG_FUNC_START, wq, 0, 0, 0, 0); |
f1a1da6c | 1898 | |
2546420a | 1899 | workqueue_lock_spin(wq); |
f1a1da6c A |
1900 | |
1901 | /* | |
2546420a A |
1902 | * We arm the add timer without holding the workqueue lock so we need |
1903 | * to synchronize with any running or soon to be running timers. | |
f1a1da6c | 1904 | * |
2546420a A |
1905 | * Threads that intend to arm the timer atomically OR |
1906 | * WQ_ATIMER_{DELAYED,IMMEDIATE}_RUNNING into the wq_flags, only if | |
1907 | * WQ_EXITING is not present. So, once we have set WQ_EXITING, we can | |
1908 | * be sure that no new RUNNING flags will be set, but still need to | |
1909 | * wait for the already running timers to complete. | |
f1a1da6c | 1910 | * |
2546420a A |
1911 | * We always hold the workq lock when dropping WQ_ATIMER_RUNNING, so |
1912 | * the check for and sleep until clear is protected. | |
f1a1da6c | 1913 | */ |
2546420a | 1914 | WQ_SETFLAG(wq, WQ_EXITING); |
f1a1da6c | 1915 | |
2546420a A |
1916 | if (wq->wq_flags & WQ_ATIMER_DELAYED_RUNNING) { |
1917 | if (thread_call_cancel(wq->wq_atimer_delayed_call) == TRUE) { | |
1918 | WQ_UNSETFLAG(wq, WQ_ATIMER_DELAYED_RUNNING); | |
1919 | } | |
1920 | } | |
1921 | if (wq->wq_flags & WQ_ATIMER_IMMEDIATE_RUNNING) { | |
1922 | if (thread_call_cancel(wq->wq_atimer_immediate_call) == TRUE) { | |
1923 | WQ_UNSETFLAG(wq, WQ_ATIMER_IMMEDIATE_RUNNING); | |
f1a1da6c A |
1924 | } |
1925 | } | |
2546420a A |
1926 | while (wq->wq_flags & (WQ_ATIMER_DELAYED_RUNNING | WQ_ATIMER_IMMEDIATE_RUNNING) || |
1927 | (wq->wq_lflags & WQL_ATIMER_BUSY)) { | |
f1a1da6c | 1928 | assert_wait((caddr_t)wq, (THREAD_UNINT)); |
2546420a | 1929 | workqueue_unlock(wq); |
f1a1da6c A |
1930 | |
1931 | thread_block(THREAD_CONTINUE_NULL); | |
1932 | ||
2546420a | 1933 | workqueue_lock_spin(wq); |
f1a1da6c | 1934 | } |
2546420a | 1935 | workqueue_unlock(wq); |
f1a1da6c A |
1936 | |
1937 | PTHREAD_TRACE(TRACE_wq_pthread_exit|DBG_FUNC_END, 0, 0, 0, 0, 0); | |
1938 | } | |
1939 | } | |
1940 | ||
1941 | /* | |
1942 | * Routine: workqueue_exit | |
1943 | * | |
1944 | * Function: clean up the work queue structure(s) now that there are no threads | |
1945 | * left running inside the work queue (except possibly current_thread). | |
1946 | * | |
1947 | * Conditions: Called by the last thread in the process. | |
1948 | * Called against current process. | |
1949 | */ | |
1950 | void | |
1951 | _workqueue_exit(struct proc *p) | |
1952 | { | |
1953 | struct workqueue * wq; | |
1954 | struct threadlist * tl, *tlist; | |
1955 | struct uthread *uth; | |
2546420a | 1956 | size_t wq_size = sizeof(struct workqueue); |
f1a1da6c A |
1957 | |
1958 | wq = pthread_kern->proc_get_wqptr(p); | |
1959 | if (wq != NULL) { | |
1960 | ||
2546420a | 1961 | PTHREAD_TRACE_WQ(TRACE_wq_workqueue_exit|DBG_FUNC_START, wq, 0, 0, 0, 0); |
f1a1da6c | 1962 | |
f1a1da6c | 1963 | pthread_kern->proc_set_wqptr(p, NULL); |
f1a1da6c A |
1964 | |
1965 | /* | |
1966 | * Clean up workqueue data structures for threads that exited and | |
1967 | * didn't get a chance to clean up after themselves. | |
1968 | */ | |
1969 | TAILQ_FOREACH_SAFE(tl, &wq->wq_thrunlist, th_entry, tlist) { | |
2546420a A |
1970 | assert((tl->th_flags & TH_LIST_RUNNING) != 0); |
1971 | ||
f1a1da6c A |
1972 | pthread_kern->thread_sched_call(tl->th_thread, NULL); |
1973 | ||
1974 | uth = pthread_kern->get_bsdthread_info(tl->th_thread); | |
1975 | if (uth != (struct uthread *)0) { | |
1976 | pthread_kern->uthread_set_threadlist(uth, NULL); | |
1977 | } | |
1978 | TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry); | |
1979 | ||
1980 | /* | |
1981 | * drop our last ref on the thread | |
1982 | */ | |
1983 | thread_deallocate(tl->th_thread); | |
1984 | ||
1985 | kfree(tl, sizeof(struct threadlist)); | |
1986 | } | |
1987 | TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlelist, th_entry, tlist) { | |
2546420a A |
1988 | assert((tl->th_flags & TH_LIST_RUNNING) == 0); |
1989 | assert(tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET); | |
1990 | workqueue_removethread(tl, true, false); | |
1991 | } | |
1992 | TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlemgrlist, th_entry, tlist) { | |
1993 | assert((tl->th_flags & TH_LIST_RUNNING) == 0); | |
1994 | assert(tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET); | |
1995 | workqueue_removethread(tl, true, false); | |
f1a1da6c | 1996 | } |
2546420a A |
1997 | thread_call_free(wq->wq_atimer_delayed_call); |
1998 | thread_call_free(wq->wq_atimer_immediate_call); | |
1999 | lck_spin_destroy(&wq->wq_lock, pthread_lck_grp); | |
f1a1da6c A |
2000 | |
2001 | kfree(wq, wq_size); | |
2002 | ||
2003 | PTHREAD_TRACE(TRACE_wq_workqueue_exit|DBG_FUNC_END, 0, 0, 0, 0, 0); | |
2004 | } | |
2005 | } | |
2006 | ||
2007 | ||
964d3577 | 2008 | #pragma mark workqueue thread manipulation |
f1a1da6c | 2009 | |
964d3577 A |
2010 | /** |
2011 | * Entry point for libdispatch to ask for threads | |
2012 | */ | |
2013 | static int wqops_queue_reqthreads(struct proc *p, int reqcount, pthread_priority_t priority){ | |
2014 | struct workqueue *wq; | |
2546420a | 2015 | boolean_t start_timer = FALSE; |
f1a1da6c | 2016 | |
964d3577 A |
2017 | boolean_t overcommit = (_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0; |
2018 | int class = pthread_priority_get_class_index(priority); | |
f1a1da6c | 2019 | |
964d3577 A |
2020 | boolean_t event_manager = (_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG) != 0; |
2021 | if (event_manager){ | |
2022 | class = WORKQUEUE_EVENT_MANAGER_BUCKET; | |
2023 | } | |
f1a1da6c | 2024 | |
964d3577 A |
2025 | if ((reqcount <= 0) || (class < 0) || (class >= WORKQUEUE_NUM_BUCKETS) || (overcommit && event_manager)) { |
2026 | return EINVAL; | |
2027 | } | |
2028 | ||
2546420a | 2029 | |
964d3577 | 2030 | if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) { |
964d3577 A |
2031 | return EINVAL; |
2032 | } | |
2546420a A |
2033 | |
2034 | workqueue_lock_spin(wq); | |
964d3577 A |
2035 | |
2036 | if (overcommit == 0 && event_manager == 0) { | |
2037 | wq->wq_reqcount += reqcount; | |
2038 | wq->wq_requests[class] += reqcount; | |
2039 | ||
2546420a | 2040 | PTHREAD_TRACE_WQ(TRACE_wq_req_threads | DBG_FUNC_NONE, wq, priority, wq->wq_requests[class], reqcount, 0); |
964d3577 A |
2041 | |
2042 | while (wq->wq_reqcount) { | |
2043 | if (!workqueue_run_one(p, wq, overcommit, 0)) | |
2044 | break; | |
2045 | } | |
2546420a A |
2046 | } else if (overcommit) { |
2047 | PTHREAD_TRACE_WQ(TRACE_wq_req_octhreads | DBG_FUNC_NONE, wq, priority, wq->wq_ocrequests[class], reqcount, 0); | |
964d3577 A |
2048 | |
2049 | while (reqcount) { | |
2050 | if (!workqueue_run_one(p, wq, overcommit, priority)) | |
2051 | break; | |
2052 | reqcount--; | |
2053 | } | |
2054 | if (reqcount) { | |
2055 | /* | |
2546420a A |
2056 | * We need to delay starting some of the overcommit requests. |
2057 | * We'll record the request here and as existing threads return to | |
2058 | * the kernel, we'll notice the ocrequests and spin them back to | |
2059 | * user space as the overcommit variety. | |
964d3577 A |
2060 | */ |
2061 | wq->wq_reqcount += reqcount; | |
2062 | wq->wq_requests[class] += reqcount; | |
2063 | wq->wq_ocrequests[class] += reqcount; | |
2064 | ||
2546420a | 2065 | PTHREAD_TRACE_WQ(TRACE_wq_delay_octhreads | DBG_FUNC_NONE, wq, priority, wq->wq_ocrequests[class], reqcount, 0); |
964d3577 | 2066 | |
2546420a A |
2067 | /* |
2068 | * If we delayed this thread coming up but we're not constrained | |
964d3577 A |
2069 | * or at max threads then we need to start the timer so we don't |
2070 | * risk dropping this request on the floor. | |
2071 | */ | |
2546420a A |
2072 | if ((wq->wq_constrained_threads_scheduled < wq_max_constrained_threads) && |
2073 | (wq->wq_nthreads < wq_max_threads)){ | |
2074 | start_timer = WQ_TIMER_DELAYED_NEEDED(wq); | |
964d3577 A |
2075 | } |
2076 | } | |
2077 | } else if (event_manager) { | |
2546420a | 2078 | PTHREAD_TRACE_WQ(TRACE_wq_req_event_manager | DBG_FUNC_NONE, wq, wq->wq_event_manager_priority, wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET], wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET], 0); |
964d3577 A |
2079 | |
2080 | if (wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0){ | |
2081 | wq->wq_reqcount += 1; | |
2082 | wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1; | |
2083 | } | |
2084 | ||
2085 | // We've recorded the request for an event manager thread above. We'll | |
2086 | // let the timer pick it up as we would for a kernel callout. We can | |
2087 | // do a direct add/wakeup when that support is added for the kevent path. | |
2546420a A |
2088 | if (wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0){ |
2089 | start_timer = WQ_TIMER_DELAYED_NEEDED(wq); | |
2090 | } | |
964d3577 | 2091 | } |
2546420a A |
2092 | |
2093 | if (start_timer) { | |
2094 | workqueue_interval_timer_start(wq); | |
2095 | } | |
2096 | ||
2097 | workqueue_unlock(wq); | |
964d3577 A |
2098 | |
2099 | return 0; | |
2100 | } | |
2101 | ||
2546420a A |
2102 | /* |
2103 | * Used by the kevent system to request threads. | |
2104 | * | |
2105 | * Currently count is ignored and we always return one thread per invocation. | |
964d3577 A |
2106 | */ |
2107 | thread_t _workq_reqthreads(struct proc *p, int requests_count, workq_reqthreads_req_t requests){ | |
2546420a A |
2108 | thread_t th = THREAD_NULL; |
2109 | boolean_t do_thread_call = FALSE; | |
2110 | boolean_t emergency_thread = FALSE; | |
964d3577 A |
2111 | assert(requests_count > 0); |
2112 | ||
2113 | #if DEBUG | |
2114 | // Make sure that the requests array is sorted, highest priority first | |
2115 | if (requests_count > 1){ | |
2116 | __assert_only qos_class_t priority = _pthread_priority_get_qos_newest(requests[0].priority); | |
2117 | __assert_only unsigned long flags = ((_pthread_priority_get_flags(requests[0].priority) & (_PTHREAD_PRIORITY_OVERCOMMIT_FLAG|_PTHREAD_PRIORITY_EVENT_MANAGER_FLAG)) != 0); | |
2118 | for (int i = 1; i < requests_count; i++){ | |
2119 | if (requests[i].count == 0) continue; | |
2120 | __assert_only qos_class_t next_priority = _pthread_priority_get_qos_newest(requests[i].priority); | |
2121 | __assert_only unsigned long next_flags = ((_pthread_priority_get_flags(requests[i].priority) & (_PTHREAD_PRIORITY_OVERCOMMIT_FLAG|_PTHREAD_PRIORITY_EVENT_MANAGER_FLAG)) != 0); | |
2122 | if (next_flags != flags){ | |
2123 | flags = next_flags; | |
2124 | priority = next_priority; | |
2125 | } else { | |
2126 | assert(next_priority <= priority); | |
2127 | } | |
2128 | } | |
2129 | } | |
2130 | #endif // DEBUG | |
f1a1da6c | 2131 | |
964d3577 | 2132 | struct workqueue *wq; |
964d3577 | 2133 | if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) { |
2546420a | 2134 | return THREAD_NULL; |
964d3577 A |
2135 | } |
2136 | ||
2546420a A |
2137 | workqueue_lock_spin(wq); |
2138 | ||
2139 | PTHREAD_TRACE_WQ(TRACE_wq_kevent_req_threads | DBG_FUNC_START, wq, requests_count, 0, 0, 0); | |
964d3577 A |
2140 | |
2141 | // Look for overcommit or event-manager-only requests. | |
2142 | boolean_t have_overcommit = FALSE; | |
2143 | pthread_priority_t priority = 0; | |
2144 | for (int i = 0; i < requests_count; i++){ | |
2145 | if (requests[i].count == 0) | |
2146 | continue; | |
2147 | priority = requests[i].priority; | |
2148 | if (_pthread_priority_get_qos_newest(priority) == QOS_CLASS_UNSPECIFIED){ | |
2149 | priority |= _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG; | |
2150 | } | |
2151 | if ((_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG) != 0){ | |
2152 | goto event_manager; | |
2153 | } | |
2154 | if ((_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0){ | |
2155 | have_overcommit = TRUE; | |
2156 | break; | |
f1a1da6c A |
2157 | } |
2158 | } | |
2159 | ||
964d3577 | 2160 | if (have_overcommit){ |
2546420a A |
2161 | if (wq->wq_thidlecount){ |
2162 | th = workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_OVERCOMMIT_KEVENT, priority, true); | |
2163 | if (th != THREAD_NULL){ | |
2164 | goto out; | |
2165 | } else { | |
2166 | workqueue_lock_spin(wq); // reacquire lock | |
2167 | } | |
964d3577 | 2168 | } |
2546420a A |
2169 | |
2170 | int class = pthread_priority_get_class_index(priority); | |
2171 | wq->wq_reqcount += 1; | |
2172 | wq->wq_requests[class] += 1; | |
2173 | wq->wq_kevent_ocrequests[class] += 1; | |
2174 | ||
2175 | do_thread_call = WQ_TIMER_IMMEDIATE_NEEDED(wq); | |
2176 | goto deferred; | |
964d3577 A |
2177 | } |
2178 | ||
2179 | // Having no overcommit requests, try to find any request that can start | |
2180 | // There's no TOCTTOU since we hold the workqueue lock | |
2181 | for (int i = 0; i < requests_count; i++){ | |
2182 | workq_reqthreads_req_t req = requests + i; | |
2183 | priority = req->priority; | |
2546420a | 2184 | int class = pthread_priority_get_class_index(priority); |
964d3577 A |
2185 | |
2186 | if (req->count == 0) | |
2187 | continue; | |
2188 | ||
2546420a A |
2189 | if (!may_start_constrained_thread(wq, class, WORKQUEUE_NUM_BUCKETS, NULL)) |
2190 | continue; | |
964d3577 | 2191 | |
2546420a A |
2192 | wq->wq_reqcount += 1; |
2193 | wq->wq_requests[class] += 1; | |
2194 | wq->wq_kevent_requests[class] += 1; | |
964d3577 | 2195 | |
2546420a | 2196 | PTHREAD_TRACE_WQ(TRACE_wq_req_kevent_threads | DBG_FUNC_NONE, wq, priority, wq->wq_kevent_requests[class], 1, 0); |
964d3577 | 2197 | |
2546420a A |
2198 | if (wq->wq_thidlecount){ |
2199 | th = workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_DEFAULT_KEVENT, priority, true); | |
2200 | goto out; | |
2201 | } else { | |
2202 | do_thread_call = WQ_TIMER_IMMEDIATE_NEEDED(wq); | |
2203 | goto deferred; | |
964d3577 A |
2204 | } |
2205 | } | |
2206 | ||
2207 | // Okay, here's the fun case: we can't spin up any of the non-overcommit threads | |
2208 | // that we've seen a request for, so we kick this over to the event manager thread | |
2546420a | 2209 | emergency_thread = TRUE; |
964d3577 A |
2210 | |
2211 | event_manager: | |
964d3577 A |
2212 | if (wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0){ |
2213 | wq->wq_reqcount += 1; | |
2214 | wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1; | |
2546420a A |
2215 | PTHREAD_TRACE_WQ(TRACE_wq_req_event_manager | DBG_FUNC_NONE, wq, 0, wq->wq_kevent_requests[WORKQUEUE_EVENT_MANAGER_BUCKET], 1, 0); |
2216 | } else { | |
2217 | PTHREAD_TRACE_WQ(TRACE_wq_req_event_manager | DBG_FUNC_NONE, wq, 0, wq->wq_kevent_requests[WORKQUEUE_EVENT_MANAGER_BUCKET], 0, 0); | |
964d3577 A |
2218 | } |
2219 | wq->wq_kevent_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1; | |
2220 | ||
2546420a A |
2221 | if (wq->wq_thidlecount && wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0){ |
2222 | th = workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_EVENT_MANAGER, 0, true); | |
2223 | assert(th != THREAD_NULL); | |
2224 | goto out; | |
2225 | } | |
2226 | do_thread_call = WQ_TIMER_IMMEDIATE_NEEDED(wq); | |
964d3577 | 2227 | |
2546420a A |
2228 | deferred: |
2229 | workqueue_unlock(wq); | |
964d3577 | 2230 | |
2546420a A |
2231 | if (do_thread_call == TRUE){ |
2232 | workqueue_interval_timer_trigger(wq); | |
2233 | } | |
964d3577 | 2234 | |
2546420a A |
2235 | out: |
2236 | PTHREAD_TRACE_WQ(TRACE_wq_kevent_req_threads | DBG_FUNC_END, wq, do_thread_call, 0, 0, 0); | |
964d3577 | 2237 | |
2546420a | 2238 | return emergency_thread ? (void*)-1 : th; |
964d3577 A |
2239 | } |
2240 | ||
2241 | ||
2242 | static int wqops_thread_return(struct proc *p){ | |
2243 | thread_t th = current_thread(); | |
2244 | struct uthread *uth = pthread_kern->get_bsdthread_info(th); | |
2546420a A |
2245 | struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth); |
2246 | ||
964d3577 A |
2247 | /* reset signal mask on the workqueue thread to default state */ |
2248 | if (pthread_kern->uthread_get_sigmask(uth) != (sigset_t)(~workq_threadmask)) { | |
2249 | pthread_kern->proc_lock(p); | |
2250 | pthread_kern->uthread_set_sigmask(uth, ~workq_threadmask); | |
2251 | pthread_kern->proc_unlock(p); | |
2252 | } | |
964d3577 A |
2253 | |
2254 | struct workqueue *wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p); | |
2255 | if (wq == NULL || !tl) { | |
964d3577 A |
2256 | return EINVAL; |
2257 | } | |
964d3577 | 2258 | |
2546420a A |
2259 | PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_START, tl->th_workq, 0, 0, 0, 0); |
2260 | ||
2261 | /* | |
2262 | * This squash call has neat semantics: it removes the specified overrides, | |
2263 | * replacing the current requested QoS with the previous effective QoS from | |
2264 | * those overrides. This means we won't be preempted due to having our QoS | |
2265 | * lowered. Of course, now our understanding of the thread's QoS is wrong, | |
2266 | * so we'll adjust below. | |
2267 | */ | |
2268 | int new_qos = | |
2269 | pthread_kern->proc_usynch_thread_qos_squash_override_for_resource(th, | |
2270 | THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD, | |
2271 | THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE); | |
2272 | ||
2273 | workqueue_lock_spin(wq); | |
2274 | ||
2275 | if (tl->th_flags & TH_LIST_KEVENT_BOUND) { | |
2276 | unsigned int flags = KEVENT_FLAG_WORKQ; | |
2277 | if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) { | |
2278 | flags |= KEVENT_FLAG_WORKQ_MANAGER; | |
2279 | } | |
2280 | ||
2281 | workqueue_unlock(wq); | |
2282 | kevent_qos_internal_unbind(p, class_index_get_thread_qos(tl->th_priority), th, flags); | |
2283 | workqueue_lock_spin(wq); | |
2284 | ||
2285 | tl->th_flags &= ~TH_LIST_KEVENT_BOUND; | |
2286 | } | |
2287 | ||
2288 | /* Fix up counters from the squash operation. */ | |
2289 | uint8_t old_bucket = tl->th_priority; | |
2290 | uint8_t new_bucket = thread_qos_get_class_index(new_qos); | |
2291 | ||
2292 | if (old_bucket != new_bucket) { | |
2293 | OSAddAtomic(-1, &wq->wq_thactive_count[old_bucket]); | |
2294 | OSAddAtomic(1, &wq->wq_thactive_count[new_bucket]); | |
2295 | ||
2296 | wq->wq_thscheduled_count[old_bucket]--; | |
2297 | wq->wq_thscheduled_count[new_bucket]++; | |
2298 | ||
2299 | tl->th_priority = new_bucket; | |
2300 | } | |
2301 | ||
2302 | PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_END, tl->th_workq, new_qos, 0, 0, 0); | |
2303 | ||
2304 | PTHREAD_TRACE_WQ(TRACE_wq_runitem | DBG_FUNC_END, wq, 0, 0, 0, 0); | |
2305 | ||
2306 | (void)workqueue_run_nextreq(p, wq, th, RUN_NEXTREQ_DEFAULT, 0, false); | |
f1a1da6c | 2307 | /* |
964d3577 A |
2308 | * workqueue_run_nextreq is responsible for |
2309 | * dropping the workqueue lock in all cases | |
f1a1da6c | 2310 | */ |
964d3577 A |
2311 | return 0; |
2312 | } | |
f1a1da6c | 2313 | |
964d3577 A |
2314 | /** |
2315 | * Multiplexed call to interact with the workqueue mechanism | |
2316 | */ | |
2317 | int | |
2318 | _workq_kernreturn(struct proc *p, | |
2319 | int options, | |
2546420a | 2320 | user_addr_t item, |
964d3577 A |
2321 | int arg2, |
2322 | int arg3, | |
2323 | int32_t *retval) | |
2324 | { | |
2325 | int error = 0; | |
2326 | ||
2327 | if (pthread_kern->proc_get_register(p) == 0) { | |
2328 | return EINVAL; | |
2329 | } | |
f1a1da6c | 2330 | |
964d3577 A |
2331 | switch (options) { |
2332 | case WQOPS_QUEUE_NEWSPISUPP: { | |
2333 | /* | |
2334 | * arg2 = offset of serialno into dispatch queue | |
2335 | * arg3 = kevent support | |
2336 | */ | |
2337 | int offset = arg2; | |
2338 | if (arg3 & 0x01){ | |
2339 | // If we get here, then userspace has indicated support for kevent delivery. | |
f1a1da6c | 2340 | } |
964d3577 A |
2341 | |
2342 | pthread_kern->proc_set_dispatchqueue_serialno_offset(p, (uint64_t)offset); | |
2343 | break; | |
f1a1da6c | 2344 | } |
964d3577 A |
2345 | case WQOPS_QUEUE_REQTHREADS: { |
2346 | /* | |
2347 | * arg2 = number of threads to start | |
2348 | * arg3 = priority | |
2349 | */ | |
2350 | error = wqops_queue_reqthreads(p, arg2, arg3); | |
2351 | break; | |
2352 | } | |
2353 | case WQOPS_SET_EVENT_MANAGER_PRIORITY: { | |
2354 | /* | |
2355 | * arg2 = priority for the manager thread | |
2356 | * | |
2357 | * if _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG is set, the | |
2358 | * ~_PTHREAD_PRIORITY_FLAGS_MASK contains a scheduling priority instead | |
2359 | * of a QOS value | |
2360 | */ | |
2361 | pthread_priority_t pri = arg2; | |
2362 | ||
964d3577 | 2363 | struct workqueue *wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p); |
2546420a | 2364 | if (wq == NULL) { |
964d3577 A |
2365 | error = EINVAL; |
2366 | break; | |
2367 | } | |
2546420a | 2368 | workqueue_lock_spin(wq); |
964d3577 A |
2369 | if (pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG){ |
2370 | // If userspace passes a scheduling priority, that takes precidence | |
2371 | // over any QoS. (So, userspace should take care not to accidenatally | |
2372 | // lower the priority this way.) | |
2373 | uint32_t sched_pri = pri & (~_PTHREAD_PRIORITY_FLAGS_MASK); | |
2374 | if (wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_FLAG){ | |
2375 | wq->wq_event_manager_priority = MAX(sched_pri, wq->wq_event_manager_priority & (~_PTHREAD_PRIORITY_FLAGS_MASK)) | |
2376 | | _PTHREAD_PRIORITY_SCHED_PRI_FLAG | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG; | |
2377 | } else { | |
2378 | wq->wq_event_manager_priority = sched_pri | |
2379 | | _PTHREAD_PRIORITY_SCHED_PRI_FLAG | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG; | |
2380 | } | |
2381 | } else if ((wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) == 0){ | |
2546420a A |
2382 | int cur_qos = pthread_priority_get_thread_qos(wq->wq_event_manager_priority); |
2383 | int new_qos = pthread_priority_get_thread_qos(pri); | |
2384 | wq->wq_event_manager_priority = (uint32_t)thread_qos_get_pthread_priority(MAX(cur_qos, new_qos)) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG; | |
964d3577 | 2385 | } |
2546420a | 2386 | workqueue_unlock(wq); |
964d3577 A |
2387 | break; |
2388 | } | |
2546420a A |
2389 | case WQOPS_THREAD_KEVENT_RETURN: |
2390 | if (item != 0) { | |
2391 | int32_t kevent_retval; | |
2392 | int ret = kevent_qos_internal(p, -1, item, arg2, item, arg2, NULL, NULL, KEVENT_FLAG_WORKQ | KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS, &kevent_retval); | |
2393 | // We shouldn't be getting more errors out than events we put in, so | |
2394 | // reusing the input buffer should always provide enough space. But, | |
2395 | // the assert is commented out since we get errors in edge cases in the | |
2396 | // process lifecycle. | |
2397 | //assert(ret == KERN_SUCCESS && kevent_retval >= 0); | |
2398 | if (ret != KERN_SUCCESS){ | |
2399 | error = ret; | |
2400 | break; | |
2401 | } else if (kevent_retval > 0){ | |
2402 | assert(kevent_retval <= arg2); | |
2403 | *retval = kevent_retval; | |
2404 | error = 0; | |
2405 | break; | |
2406 | } | |
f1a1da6c | 2407 | } |
2546420a A |
2408 | // FALLTHRU |
2409 | case WQOPS_THREAD_RETURN: | |
964d3577 A |
2410 | error = wqops_thread_return(p); |
2411 | // NOT REACHED except in case of error | |
2412 | assert(error); | |
2413 | break; | |
964d3577 A |
2414 | default: |
2415 | error = EINVAL; | |
2416 | break; | |
2417 | } | |
2418 | return (error); | |
2419 | } | |
f1a1da6c | 2420 | |
f1a1da6c | 2421 | |
964d3577 A |
2422 | static boolean_t |
2423 | workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, pthread_priority_t priority) | |
2424 | { | |
2425 | boolean_t ran_one; | |
f1a1da6c | 2426 | |
964d3577 A |
2427 | if (wq->wq_thidlecount == 0) { |
2428 | if (overcommit == FALSE) { | |
2429 | if (wq->wq_constrained_threads_scheduled < wq->wq_max_concurrency) | |
2430 | workqueue_addnewthread(wq, overcommit); | |
2431 | } else { | |
2432 | workqueue_addnewthread(wq, overcommit); | |
2433 | ||
2434 | if (wq->wq_thidlecount == 0) | |
2435 | return (FALSE); | |
f1a1da6c A |
2436 | } |
2437 | } | |
2546420a | 2438 | ran_one = (workqueue_run_nextreq(p, wq, THREAD_NULL, overcommit ? RUN_NEXTREQ_OVERCOMMIT : RUN_NEXTREQ_DEFAULT, priority, false) != THREAD_NULL); |
f1a1da6c | 2439 | /* |
964d3577 A |
2440 | * workqueue_run_nextreq is responsible for |
2441 | * dropping the workqueue lock in all cases | |
f1a1da6c | 2442 | */ |
2546420a | 2443 | workqueue_lock_spin(wq); |
964d3577 A |
2444 | |
2445 | return (ran_one); | |
2446 | } | |
2447 | ||
2448 | /* | |
2546420a A |
2449 | * We have no work to do, park ourselves on the idle list. |
2450 | * | |
2451 | * Consumes the workqueue lock and does not return. | |
964d3577 | 2452 | */ |
2546420a | 2453 | static void __dead2 |
964d3577 A |
2454 | parkit(struct workqueue *wq, struct threadlist *tl, thread_t thread) |
2455 | { | |
2546420a A |
2456 | assert(thread == tl->th_thread); |
2457 | assert(thread == current_thread()); | |
2458 | ||
2459 | uint32_t us_to_wait = 0; | |
2460 | ||
964d3577 | 2461 | TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry); |
964d3577 | 2462 | |
2546420a A |
2463 | tl->th_flags &= ~TH_LIST_RUNNING; |
2464 | tl->th_flags &= ~TH_LIST_KEVENT; | |
2465 | assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0); | |
964d3577 | 2466 | |
2546420a A |
2467 | if (tl->th_flags & TH_LIST_CONSTRAINED) { |
2468 | wq->wq_constrained_threads_scheduled--; | |
2469 | tl->th_flags &= ~TH_LIST_CONSTRAINED; | |
2470 | } | |
964d3577 A |
2471 | |
2472 | OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority]); | |
2473 | wq->wq_thscheduled_count[tl->th_priority]--; | |
2474 | wq->wq_threads_scheduled--; | |
2546420a | 2475 | uint32_t thidlecount = ++wq->wq_thidlecount; |
964d3577 | 2476 | |
2546420a A |
2477 | pthread_kern->thread_sched_call(thread, NULL); |
2478 | ||
2479 | /* | |
2480 | * We'd like to always have one manager thread parked so that we can have | |
2481 | * low latency when we need to bring a manager thread up. If that idle | |
2482 | * thread list is empty, make this thread a manager thread. | |
2483 | * | |
2484 | * XXX: This doesn't check that there's not a manager thread outstanding, | |
2485 | * so it's based on the assumption that most manager callouts will change | |
2486 | * their QoS before parking. If that stops being true, this may end up | |
2487 | * costing us more than we gain. | |
2488 | */ | |
2489 | if (TAILQ_EMPTY(&wq->wq_thidlemgrlist) && | |
2490 | tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET){ | |
2491 | reset_priority(tl, pthread_priority_from_wq_class_index(wq, WORKQUEUE_EVENT_MANAGER_BUCKET)); | |
2492 | tl->th_priority = WORKQUEUE_EVENT_MANAGER_BUCKET; | |
f1a1da6c A |
2493 | } |
2494 | ||
2546420a A |
2495 | if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET){ |
2496 | TAILQ_INSERT_HEAD(&wq->wq_thidlemgrlist, tl, th_entry); | |
2497 | } else { | |
2498 | TAILQ_INSERT_HEAD(&wq->wq_thidlelist, tl, th_entry); | |
2499 | } | |
964d3577 | 2500 | |
2546420a A |
2501 | PTHREAD_TRACE_WQ(TRACE_wq_thread_park | DBG_FUNC_START, wq, |
2502 | wq->wq_threads_scheduled, wq->wq_thidlecount, us_to_wait, 0); | |
964d3577 | 2503 | |
2546420a A |
2504 | /* |
2505 | * When we remove the voucher from the thread, we may lose our importance | |
2506 | * causing us to get preempted, so we do this after putting the thread on | |
2507 | * the idle list. That when, when we get our importance back we'll be able | |
2508 | * to use this thread from e.g. the kevent call out to deliver a boosting | |
2509 | * message. | |
2510 | */ | |
2511 | workqueue_unlock(wq); | |
2512 | kern_return_t kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL); | |
2513 | assert(kr == KERN_SUCCESS); | |
2514 | workqueue_lock_spin(wq); | |
2515 | ||
2516 | if ((tl->th_flags & TH_LIST_RUNNING) == 0) { | |
2517 | if (thidlecount < 101) { | |
2518 | us_to_wait = wq_reduce_pool_window_usecs - ((thidlecount-2) * (wq_reduce_pool_window_usecs / 100)); | |
2519 | } else { | |
2520 | us_to_wait = wq_reduce_pool_window_usecs / 100; | |
2521 | } | |
2522 | ||
2523 | assert_wait_timeout_with_leeway((caddr_t)tl, (THREAD_INTERRUPTIBLE), | |
2524 | TIMEOUT_URGENCY_SYS_BACKGROUND|TIMEOUT_URGENCY_LEEWAY, us_to_wait, | |
2525 | wq_reduce_pool_window_usecs/10, NSEC_PER_USEC); | |
2526 | ||
2527 | workqueue_unlock(wq); | |
2528 | ||
2529 | thread_block(wq_unpark_continue); | |
2530 | panic("thread_block(wq_unpark_continue) returned!"); | |
2531 | } else { | |
2532 | workqueue_unlock(wq); | |
2533 | ||
2534 | /* | |
2535 | * While we'd dropped the lock to unset our voucher, someone came | |
2536 | * around and made us runnable. But because we weren't waiting on the | |
2537 | * event their wakeup() was ineffectual. To correct for that, we just | |
2538 | * run the continuation ourselves. | |
2539 | */ | |
2540 | wq_unpark_continue(NULL, THREAD_AWAKENED); | |
2541 | } | |
964d3577 | 2542 | } |
f1a1da6c | 2543 | |
964d3577 A |
2544 | static boolean_t may_start_constrained_thread(struct workqueue *wq, uint32_t at_priclass, uint32_t my_priclass, boolean_t *start_timer){ |
2545 | if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) { | |
2546 | /* | |
2547 | * we need 1 or more constrained threads to return to the kernel before | |
2548 | * we can dispatch additional work | |
2549 | */ | |
2550 | return FALSE; | |
2551 | } | |
f1a1da6c | 2552 | |
964d3577 A |
2553 | uint32_t busycount = 0; |
2554 | uint32_t thactive_count = wq->wq_thactive_count[at_priclass]; | |
f1a1da6c | 2555 | |
964d3577 A |
2556 | // Has our most recently blocked thread blocked recently enough that we |
2557 | // should still consider it busy? | |
2546420a | 2558 | if (wq->wq_thscheduled_count[at_priclass] > wq->wq_thactive_count[at_priclass]) { |
964d3577 | 2559 | if (wq_thread_is_busy(mach_absolute_time(), &wq->wq_lastblocked_ts[at_priclass])) { |
f1a1da6c A |
2560 | busycount++; |
2561 | } | |
2562 | } | |
2563 | ||
964d3577 A |
2564 | if (my_priclass < WORKQUEUE_NUM_BUCKETS && my_priclass == at_priclass){ |
2565 | /* | |
2546420a | 2566 | * don't count this thread as currently active |
964d3577 A |
2567 | */ |
2568 | thactive_count--; | |
f1a1da6c | 2569 | } |
964d3577 | 2570 | |
f1a1da6c | 2571 | if (thactive_count + busycount >= wq->wq_max_concurrency) { |
964d3577 | 2572 | if (busycount && start_timer) { |
f1a1da6c A |
2573 | /* |
2574 | * we found at least 1 thread in the | |
2575 | * 'busy' state... make sure we start | |
2576 | * the timer because if they are the only | |
2577 | * threads keeping us from scheduling | |
2578 | * this work request, we won't get a callback | |
2579 | * to kick off the timer... we need to | |
2580 | * start it now... | |
2581 | */ | |
2546420a | 2582 | *start_timer = WQ_TIMER_DELAYED_NEEDED(wq); |
f1a1da6c A |
2583 | } |
2584 | ||
2546420a | 2585 | PTHREAD_TRACE_WQ(TRACE_wq_overcommitted|DBG_FUNC_NONE, wq, ((start_timer && *start_timer) ? 1 << _PTHREAD_PRIORITY_FLAGS_SHIFT : 0) | class_index_get_pthread_priority(at_priclass), thactive_count, busycount, 0); |
f1a1da6c | 2586 | |
964d3577 A |
2587 | return FALSE; |
2588 | } | |
2589 | return TRUE; | |
2590 | } | |
2591 | ||
2546420a A |
2592 | static struct threadlist * |
2593 | pop_from_thidlelist(struct workqueue *wq, uint32_t priclass) | |
2594 | { | |
2595 | assert(wq->wq_thidlecount); | |
964d3577 | 2596 | |
2546420a | 2597 | struct threadlist *tl = NULL; |
964d3577 | 2598 | |
2546420a A |
2599 | if (!TAILQ_EMPTY(&wq->wq_thidlemgrlist) && |
2600 | (priclass == WORKQUEUE_EVENT_MANAGER_BUCKET || TAILQ_EMPTY(&wq->wq_thidlelist))){ | |
2601 | tl = TAILQ_FIRST(&wq->wq_thidlemgrlist); | |
2602 | TAILQ_REMOVE(&wq->wq_thidlemgrlist, tl, th_entry); | |
2603 | assert(tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET); | |
2604 | } else if (!TAILQ_EMPTY(&wq->wq_thidlelist) && | |
2605 | (priclass != WORKQUEUE_EVENT_MANAGER_BUCKET || TAILQ_EMPTY(&wq->wq_thidlemgrlist))){ | |
2606 | tl = TAILQ_FIRST(&wq->wq_thidlelist); | |
2607 | TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry); | |
2608 | assert(tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET); | |
2609 | } else { | |
2610 | panic("pop_from_thidlelist called with no threads available"); | |
964d3577 | 2611 | } |
2546420a A |
2612 | assert((tl->th_flags & TH_LIST_RUNNING) == 0); |
2613 | ||
2614 | assert(wq->wq_thidlecount); | |
2615 | wq->wq_thidlecount--; | |
2616 | ||
2617 | TAILQ_INSERT_TAIL(&wq->wq_thrunlist, tl, th_entry); | |
2618 | ||
964d3577 A |
2619 | tl->th_flags |= TH_LIST_RUNNING | TH_LIST_BUSY; |
2620 | ||
2621 | wq->wq_threads_scheduled++; | |
2622 | wq->wq_thscheduled_count[priclass]++; | |
2623 | OSAddAtomic(1, &wq->wq_thactive_count[priclass]); | |
2624 | ||
2625 | return tl; | |
2626 | } | |
2627 | ||
2546420a A |
2628 | static pthread_priority_t |
2629 | pthread_priority_from_wq_class_index(struct workqueue *wq, int index){ | |
2630 | if (index == WORKQUEUE_EVENT_MANAGER_BUCKET){ | |
2631 | return wq->wq_event_manager_priority; | |
2632 | } else { | |
2633 | return class_index_get_pthread_priority(index); | |
2634 | } | |
2635 | } | |
2636 | ||
964d3577 | 2637 | static void |
2546420a | 2638 | reset_priority(struct threadlist *tl, pthread_priority_t pri){ |
964d3577 A |
2639 | kern_return_t ret; |
2640 | thread_t th = tl->th_thread; | |
2641 | ||
2546420a A |
2642 | if ((pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) == 0){ |
2643 | ret = pthread_kern->thread_set_workq_qos(th, pthread_priority_get_thread_qos(pri), 0); | |
964d3577 | 2644 | assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED); |
964d3577 | 2645 | |
2546420a | 2646 | if (tl->th_flags & TH_LIST_EVENT_MGR_SCHED_PRI) { |
964d3577 | 2647 | |
2546420a | 2648 | /* Reset priority to default (masked by QoS) */ |
964d3577 | 2649 | |
2546420a A |
2650 | ret = pthread_kern->thread_set_workq_pri(th, 31, POLICY_TIMESHARE); |
2651 | assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED); | |
964d3577 | 2652 | |
2546420a A |
2653 | tl->th_flags &= ~TH_LIST_EVENT_MGR_SCHED_PRI; |
2654 | } | |
2655 | } else { | |
2656 | ret = pthread_kern->thread_set_workq_qos(th, THREAD_QOS_UNSPECIFIED, 0); | |
2657 | assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED); | |
2658 | ret = pthread_kern->thread_set_workq_pri(th, (pri & (~_PTHREAD_PRIORITY_FLAGS_MASK)), POLICY_TIMESHARE); | |
2659 | assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED); | |
964d3577 | 2660 | |
2546420a A |
2661 | tl->th_flags |= TH_LIST_EVENT_MGR_SCHED_PRI; |
2662 | } | |
964d3577 A |
2663 | } |
2664 | ||
2665 | /** | |
2666 | * grabs a thread for a request | |
2667 | * | |
2668 | * - called with the workqueue lock held... | |
2669 | * - responsible for dropping it in all cases | |
2670 | * - if provided mode is for overcommit, doesn't consume a reqcount | |
2671 | * | |
2672 | */ | |
2546420a | 2673 | static thread_t |
964d3577 | 2674 | workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t thread, |
2546420a A |
2675 | enum run_nextreq_mode mode, pthread_priority_t prio, |
2676 | bool kevent_bind_via_return) | |
964d3577 A |
2677 | { |
2678 | thread_t th_to_run = THREAD_NULL; | |
2546420a | 2679 | uint32_t upcall_flags = 0; |
964d3577 A |
2680 | uint32_t priclass; |
2681 | struct threadlist *tl = NULL; | |
2682 | struct uthread *uth = NULL; | |
2683 | boolean_t start_timer = FALSE; | |
2684 | ||
2546420a A |
2685 | if (mode == RUN_NEXTREQ_ADD_TIMER) { |
2686 | mode = RUN_NEXTREQ_DEFAULT; | |
2687 | } | |
2688 | ||
964d3577 | 2689 | // valid modes to call this function with |
2546420a A |
2690 | assert(mode == RUN_NEXTREQ_DEFAULT || mode == RUN_NEXTREQ_DEFAULT_KEVENT || |
2691 | mode == RUN_NEXTREQ_OVERCOMMIT || mode == RUN_NEXTREQ_UNCONSTRAINED || | |
2692 | mode == RUN_NEXTREQ_EVENT_MANAGER || mode == RUN_NEXTREQ_OVERCOMMIT_KEVENT); | |
2693 | // may only have a priority if in OVERCOMMIT or DEFAULT_KEVENT mode | |
2694 | assert(mode == RUN_NEXTREQ_OVERCOMMIT || mode == RUN_NEXTREQ_OVERCOMMIT_KEVENT || | |
2695 | mode == RUN_NEXTREQ_DEFAULT_KEVENT || prio == 0); | |
964d3577 A |
2696 | // thread == thread_null means "please spin up a new workqueue thread, we can't reuse this" |
2697 | // thread != thread_null is thread reuse, and must be the current thread | |
2698 | assert(thread == THREAD_NULL || thread == current_thread()); | |
2699 | ||
2546420a | 2700 | PTHREAD_TRACE_WQ(TRACE_wq_run_nextitem|DBG_FUNC_START, wq, thread_tid(thread), wq->wq_thidlecount, wq->wq_reqcount, 0); |
964d3577 A |
2701 | |
2702 | if (thread != THREAD_NULL) { | |
2703 | uth = pthread_kern->get_bsdthread_info(thread); | |
2704 | ||
2705 | if ((tl = pthread_kern->uthread_get_threadlist(uth)) == NULL) { | |
2706 | panic("wq thread with no threadlist"); | |
f1a1da6c | 2707 | } |
964d3577 | 2708 | } |
f1a1da6c | 2709 | |
964d3577 | 2710 | /* |
2546420a A |
2711 | * from here until we drop the workq lock we can't be pre-empted since we |
2712 | * hold the lock in spin mode... this is important since we have to | |
2713 | * independently update the priority that the thread is associated with and | |
2714 | * the priorty based counters that "workqueue_callback" also changes and | |
2715 | * bases decisions on. | |
964d3577 A |
2716 | */ |
2717 | ||
2546420a A |
2718 | /* |
2719 | * This giant monstrosity does three things: | |
2720 | * | |
2721 | * - adjusts the mode, if required | |
2722 | * - selects the priclass that we'll be servicing | |
2723 | * - sets any mode-specific upcall flags | |
2724 | * | |
2725 | * When possible special-cases should be handled here and converted into | |
2726 | * non-special cases. | |
2727 | */ | |
964d3577 | 2728 | if (mode == RUN_NEXTREQ_OVERCOMMIT) { |
2546420a | 2729 | priclass = pthread_priority_get_class_index(prio); |
964d3577 | 2730 | upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT; |
2546420a A |
2731 | } else if (mode == RUN_NEXTREQ_OVERCOMMIT_KEVENT){ |
2732 | priclass = pthread_priority_get_class_index(prio); | |
2733 | upcall_flags |= WQ_FLAG_THREAD_KEVENT; | |
2734 | } else if (mode == RUN_NEXTREQ_EVENT_MANAGER){ | |
2735 | assert(wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0); | |
2736 | priclass = WORKQUEUE_EVENT_MANAGER_BUCKET; | |
2737 | upcall_flags |= WQ_FLAG_THREAD_EVENT_MANAGER; | |
2738 | if (wq->wq_kevent_requests[WORKQUEUE_EVENT_MANAGER_BUCKET]){ | |
2739 | upcall_flags |= WQ_FLAG_THREAD_KEVENT; | |
2740 | } | |
964d3577 A |
2741 | } else if (wq->wq_reqcount == 0){ |
2742 | // no work to do. we'll check again when new work arrives. | |
2743 | goto done; | |
2546420a A |
2744 | } else if (mode == RUN_NEXTREQ_DEFAULT_KEVENT) { |
2745 | assert(kevent_bind_via_return); | |
2746 | ||
2747 | priclass = pthread_priority_get_class_index(prio); | |
2748 | assert(priclass < WORKQUEUE_EVENT_MANAGER_BUCKET); | |
2749 | assert(wq->wq_kevent_requests[priclass] > 0); | |
2750 | ||
2751 | upcall_flags |= WQ_FLAG_THREAD_KEVENT; | |
2752 | mode = RUN_NEXTREQ_DEFAULT; | |
964d3577 A |
2753 | } else if (wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] && |
2754 | ((wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0) || | |
2755 | (thread != THREAD_NULL && tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET))){ | |
2756 | // There's an event manager request and either: | |
2757 | // - no event manager currently running | |
2758 | // - we are re-using the event manager | |
2759 | mode = RUN_NEXTREQ_EVENT_MANAGER; | |
2760 | priclass = WORKQUEUE_EVENT_MANAGER_BUCKET; | |
2761 | upcall_flags |= WQ_FLAG_THREAD_EVENT_MANAGER; | |
2546420a | 2762 | if (wq->wq_kevent_requests[WORKQUEUE_EVENT_MANAGER_BUCKET]){ |
964d3577 | 2763 | upcall_flags |= WQ_FLAG_THREAD_KEVENT; |
2546420a | 2764 | } |
964d3577 A |
2765 | } else { |
2766 | // Find highest priority and check for special request types | |
2767 | for (priclass = 0; priclass < WORKQUEUE_EVENT_MANAGER_BUCKET; priclass++) { | |
2768 | if (wq->wq_requests[priclass]) | |
2769 | break; | |
2770 | } | |
2771 | if (priclass == WORKQUEUE_EVENT_MANAGER_BUCKET){ | |
2772 | // only request should have been event manager since it's not in a bucket, | |
2773 | // but we weren't able to handle it since there's already an event manager running, | |
2774 | // so we fell into this case | |
2775 | assert(wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] == 1 && | |
2776 | wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 1 && | |
2777 | wq->wq_reqcount == 1); | |
2778 | goto done; | |
2779 | } | |
2780 | ||
2781 | if (wq->wq_kevent_ocrequests[priclass]){ | |
2782 | mode = RUN_NEXTREQ_DEFERRED_OVERCOMMIT; | |
2783 | upcall_flags |= WQ_FLAG_THREAD_KEVENT; | |
2784 | upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT; | |
2785 | } else if (wq->wq_ocrequests[priclass]){ | |
2786 | mode = RUN_NEXTREQ_DEFERRED_OVERCOMMIT; | |
2787 | upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT; | |
2788 | } else if (wq->wq_kevent_requests[priclass]){ | |
2789 | upcall_flags |= WQ_FLAG_THREAD_KEVENT; | |
2790 | } | |
2791 | } | |
2792 | ||
2546420a A |
2793 | assert(mode != RUN_NEXTREQ_EVENT_MANAGER || priclass == WORKQUEUE_EVENT_MANAGER_BUCKET); |
2794 | assert(mode == RUN_NEXTREQ_EVENT_MANAGER || priclass != WORKQUEUE_EVENT_MANAGER_BUCKET); | |
2795 | ||
964d3577 A |
2796 | if (mode == RUN_NEXTREQ_DEFAULT /* non-overcommit */){ |
2797 | uint32_t my_priclass = (thread != THREAD_NULL) ? tl->th_priority : WORKQUEUE_NUM_BUCKETS; | |
2798 | if (may_start_constrained_thread(wq, priclass, my_priclass, &start_timer) == FALSE){ | |
2799 | // per policy, we won't start another constrained thread | |
2800 | goto done; | |
2801 | } | |
f1a1da6c A |
2802 | } |
2803 | ||
2804 | if (thread != THREAD_NULL) { | |
2805 | /* | |
2806 | * thread is non-NULL here when we return from userspace | |
2807 | * in workq_kernreturn, rather than trying to find a thread | |
2808 | * we pick up new work for this specific thread. | |
2809 | */ | |
2810 | th_to_run = thread; | |
2546420a | 2811 | upcall_flags |= WQ_FLAG_THREAD_REUSE; |
964d3577 | 2812 | } else if (wq->wq_thidlecount == 0) { |
f1a1da6c A |
2813 | /* |
2814 | * we have no additional threads waiting to pick up | |
2815 | * work, however, there is additional work to do. | |
2816 | */ | |
2546420a | 2817 | start_timer = WQ_TIMER_DELAYED_NEEDED(wq); |
f1a1da6c | 2818 | |
2546420a | 2819 | PTHREAD_TRACE_WQ(TRACE_wq_stalled, wq, wq->wq_nthreads, start_timer, 0, 0); |
f1a1da6c | 2820 | |
964d3577 A |
2821 | goto done; |
2822 | } else { | |
2546420a A |
2823 | // there is both work available and an idle thread, so activate a thread |
2824 | tl = pop_from_thidlelist(wq, priclass); | |
2825 | th_to_run = tl->th_thread; | |
f1a1da6c A |
2826 | } |
2827 | ||
964d3577 A |
2828 | // Adjust counters and thread flags AKA consume the request |
2829 | // TODO: It would be lovely if OVERCOMMIT consumed reqcount | |
2830 | switch (mode) { | |
2831 | case RUN_NEXTREQ_DEFAULT: | |
2546420a A |
2832 | case RUN_NEXTREQ_DEFAULT_KEVENT: /* actually mapped to DEFAULT above */ |
2833 | case RUN_NEXTREQ_ADD_TIMER: /* actually mapped to DEFAULT above */ | |
964d3577 A |
2834 | case RUN_NEXTREQ_UNCONSTRAINED: |
2835 | wq->wq_reqcount--; | |
2836 | wq->wq_requests[priclass]--; | |
f1a1da6c | 2837 | |
964d3577 A |
2838 | if (mode == RUN_NEXTREQ_DEFAULT){ |
2839 | if (!(tl->th_flags & TH_LIST_CONSTRAINED)) { | |
2840 | wq->wq_constrained_threads_scheduled++; | |
2841 | tl->th_flags |= TH_LIST_CONSTRAINED; | |
2842 | } | |
2843 | } else if (mode == RUN_NEXTREQ_UNCONSTRAINED){ | |
2844 | if (tl->th_flags & TH_LIST_CONSTRAINED) { | |
964d3577 A |
2845 | wq->wq_constrained_threads_scheduled--; |
2846 | tl->th_flags &= ~TH_LIST_CONSTRAINED; | |
2847 | } | |
2848 | } | |
2849 | if (upcall_flags & WQ_FLAG_THREAD_KEVENT){ | |
2850 | wq->wq_kevent_requests[priclass]--; | |
2851 | } | |
2852 | break; | |
f1a1da6c | 2853 | |
964d3577 A |
2854 | case RUN_NEXTREQ_EVENT_MANAGER: |
2855 | wq->wq_reqcount--; | |
2856 | wq->wq_requests[priclass]--; | |
f1a1da6c | 2857 | |
964d3577 A |
2858 | if (tl->th_flags & TH_LIST_CONSTRAINED) { |
2859 | wq->wq_constrained_threads_scheduled--; | |
2860 | tl->th_flags &= ~TH_LIST_CONSTRAINED; | |
2861 | } | |
2862 | if (upcall_flags & WQ_FLAG_THREAD_KEVENT){ | |
2863 | wq->wq_kevent_requests[priclass]--; | |
2864 | } | |
2865 | break; | |
f1a1da6c | 2866 | |
964d3577 A |
2867 | case RUN_NEXTREQ_DEFERRED_OVERCOMMIT: |
2868 | wq->wq_reqcount--; | |
2869 | wq->wq_requests[priclass]--; | |
2870 | if (upcall_flags & WQ_FLAG_THREAD_KEVENT){ | |
2871 | wq->wq_kevent_ocrequests[priclass]--; | |
2872 | } else { | |
2873 | wq->wq_ocrequests[priclass]--; | |
2874 | } | |
2875 | /* FALLTHROUGH */ | |
2876 | case RUN_NEXTREQ_OVERCOMMIT: | |
2546420a | 2877 | case RUN_NEXTREQ_OVERCOMMIT_KEVENT: |
964d3577 A |
2878 | if (tl->th_flags & TH_LIST_CONSTRAINED) { |
2879 | wq->wq_constrained_threads_scheduled--; | |
2880 | tl->th_flags &= ~TH_LIST_CONSTRAINED; | |
2881 | } | |
2882 | break; | |
f1a1da6c A |
2883 | } |
2884 | ||
964d3577 A |
2885 | // Confirm we've maintained our counter invariants |
2886 | assert(wq->wq_requests[priclass] < UINT16_MAX); | |
2887 | assert(wq->wq_ocrequests[priclass] < UINT16_MAX); | |
2888 | assert(wq->wq_kevent_requests[priclass] < UINT16_MAX); | |
2889 | assert(wq->wq_kevent_ocrequests[priclass] < UINT16_MAX); | |
2890 | assert(wq->wq_ocrequests[priclass] + wq->wq_kevent_requests[priclass] + | |
2891 | wq->wq_kevent_ocrequests[priclass] <= | |
2892 | wq->wq_requests[priclass]); | |
2893 | ||
2546420a A |
2894 | assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0); |
2895 | if (upcall_flags & WQ_FLAG_THREAD_KEVENT) { | |
2896 | tl->th_flags |= TH_LIST_KEVENT; | |
2897 | } else { | |
2898 | tl->th_flags &= ~TH_LIST_KEVENT; | |
2899 | } | |
2900 | ||
964d3577 | 2901 | uint32_t orig_class = tl->th_priority; |
f1a1da6c A |
2902 | tl->th_priority = (uint8_t)priclass; |
2903 | ||
964d3577 | 2904 | if ((thread != THREAD_NULL) && (orig_class != priclass)) { |
f1a1da6c A |
2905 | /* |
2906 | * we need to adjust these counters based on this | |
2907 | * thread's new disposition w/r to priority | |
2908 | */ | |
2909 | OSAddAtomic(-1, &wq->wq_thactive_count[orig_class]); | |
2910 | OSAddAtomic(1, &wq->wq_thactive_count[priclass]); | |
2911 | ||
2912 | wq->wq_thscheduled_count[orig_class]--; | |
2913 | wq->wq_thscheduled_count[priclass]++; | |
2914 | } | |
2915 | wq->wq_thread_yielded_count = 0; | |
2916 | ||
2546420a A |
2917 | pthread_priority_t outgoing_priority = pthread_priority_from_wq_class_index(wq, tl->th_priority); |
2918 | PTHREAD_TRACE_WQ(TRACE_wq_reset_priority | DBG_FUNC_START, wq, thread_tid(tl->th_thread), outgoing_priority, 0, 0); | |
2919 | reset_priority(tl, outgoing_priority); | |
2920 | PTHREAD_TRACE_WQ(TRACE_wq_reset_priority | DBG_FUNC_END, wq, thread_tid(tl->th_thread), outgoing_priority, 0, 0); | |
f1a1da6c | 2921 | |
2546420a A |
2922 | /* |
2923 | * persist upcall_flags so that in can be retrieved in setup_wqthread | |
2924 | */ | |
2925 | tl->th_upcall_flags = upcall_flags >> WQ_FLAG_THREAD_PRIOSHIFT; | |
f1a1da6c A |
2926 | |
2927 | /* | |
2928 | * if current thread is reused for work request, does not return via unix_syscall | |
2929 | */ | |
2546420a A |
2930 | wq_runreq(p, th_to_run, wq, tl, (thread == th_to_run), |
2931 | (upcall_flags & WQ_FLAG_THREAD_KEVENT) && !kevent_bind_via_return); | |
f1a1da6c | 2932 | |
2546420a | 2933 | PTHREAD_TRACE_WQ(TRACE_wq_run_nextitem|DBG_FUNC_END, wq, thread_tid(th_to_run), mode == RUN_NEXTREQ_OVERCOMMIT, 1, 0); |
f1a1da6c | 2934 | |
2546420a A |
2935 | assert(!kevent_bind_via_return || (upcall_flags & WQ_FLAG_THREAD_KEVENT)); |
2936 | if (kevent_bind_via_return && (upcall_flags & WQ_FLAG_THREAD_KEVENT)) { | |
2937 | tl->th_flags |= TH_LIST_KEVENT_BOUND; | |
f1a1da6c | 2938 | } |
f1a1da6c | 2939 | |
2546420a A |
2940 | workqueue_unlock(wq); |
2941 | ||
2942 | return th_to_run; | |
f1a1da6c | 2943 | |
2546420a | 2944 | done: |
f1a1da6c A |
2945 | if (start_timer) |
2946 | workqueue_interval_timer_start(wq); | |
2947 | ||
2546420a | 2948 | PTHREAD_TRACE_WQ(TRACE_wq_run_nextitem | DBG_FUNC_END, wq, thread_tid(thread), start_timer, 3, 0); |
f1a1da6c | 2949 | |
964d3577 | 2950 | if (thread != THREAD_NULL){ |
2546420a | 2951 | parkit(wq, tl, thread); |
964d3577 A |
2952 | /* NOT REACHED */ |
2953 | } | |
f1a1da6c | 2954 | |
2546420a A |
2955 | workqueue_unlock(wq); |
2956 | ||
2957 | return THREAD_NULL; | |
f1a1da6c A |
2958 | } |
2959 | ||
964d3577 | 2960 | /** |
2546420a | 2961 | * parked thread wakes up |
964d3577 | 2962 | */ |
2546420a A |
2963 | static void __dead2 |
2964 | wq_unpark_continue(void* __unused ptr, wait_result_t wait_result) | |
f1a1da6c | 2965 | { |
2546420a A |
2966 | boolean_t first_use = false; |
2967 | thread_t th = current_thread(); | |
2968 | proc_t p = current_proc(); | |
f1a1da6c | 2969 | |
2546420a A |
2970 | struct uthread *uth = pthread_kern->get_bsdthread_info(th); |
2971 | if (uth == NULL) goto done; | |
f1a1da6c | 2972 | |
2546420a A |
2973 | struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth); |
2974 | if (tl == NULL) goto done; | |
f1a1da6c | 2975 | |
2546420a | 2976 | struct workqueue *wq = tl->th_workq; |
f1a1da6c | 2977 | |
2546420a | 2978 | workqueue_lock_spin(wq); |
f1a1da6c | 2979 | |
2546420a | 2980 | assert(tl->th_flags & TH_LIST_INITED); |
f1a1da6c | 2981 | |
2546420a A |
2982 | if ((tl->th_flags & TH_LIST_NEW)){ |
2983 | tl->th_flags &= ~(TH_LIST_NEW); | |
2984 | first_use = true; | |
2985 | } | |
2986 | ||
2987 | if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) { | |
f1a1da6c | 2988 | /* |
2546420a | 2989 | * The normal wakeup path. |
f1a1da6c | 2990 | */ |
2546420a | 2991 | goto return_to_user; |
f1a1da6c | 2992 | } |
f1a1da6c | 2993 | |
2546420a A |
2994 | if ((tl->th_flags & TH_LIST_RUNNING) == 0 && |
2995 | wait_result == THREAD_TIMED_OUT && | |
2996 | tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET && | |
2997 | TAILQ_FIRST(&wq->wq_thidlemgrlist) == tl && | |
2998 | TAILQ_NEXT(tl, th_entry) == NULL){ | |
2999 | /* | |
3000 | * If we are the only idle manager and we pop'ed for self-destruction, | |
3001 | * then don't actually exit. Instead, free our stack to save some | |
3002 | * memory and re-park. | |
3003 | */ | |
f1a1da6c | 3004 | |
2546420a | 3005 | workqueue_unlock(wq); |
f1a1da6c | 3006 | |
2546420a | 3007 | vm_map_t vmap = wq->wq_map; |
f1a1da6c | 3008 | |
2546420a A |
3009 | // Keep this in sync with _setup_wqthread() |
3010 | const vm_size_t guardsize = vm_map_page_size(vmap); | |
3011 | const user_addr_t freeaddr = (user_addr_t)tl->th_stackaddr + guardsize; | |
3012 | const vm_map_offset_t freesize = vm_map_trunc_page_mask((PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET) - 1, vm_map_page_mask(vmap)) - guardsize; | |
3013 | ||
3014 | int kr; | |
3015 | kr = mach_vm_behavior_set(vmap, freeaddr, freesize, VM_BEHAVIOR_REUSABLE); | |
3016 | assert(kr == KERN_SUCCESS || kr == KERN_INVALID_ADDRESS); | |
f1a1da6c | 3017 | |
2546420a | 3018 | workqueue_lock_spin(wq); |
f1a1da6c | 3019 | |
964d3577 | 3020 | if ( !(tl->th_flags & TH_LIST_RUNNING)) { |
2546420a A |
3021 | assert_wait((caddr_t)tl, (THREAD_INTERRUPTIBLE)); |
3022 | ||
3023 | workqueue_unlock(wq); | |
3024 | ||
3025 | thread_block(wq_unpark_continue); | |
3026 | /* NOT REACHED */ | |
964d3577 | 3027 | } |
2546420a | 3028 | } |
f1a1da6c | 3029 | |
2546420a A |
3030 | if ((tl->th_flags & TH_LIST_RUNNING) == 0) { |
3031 | assert((tl->th_flags & TH_LIST_BUSY) == 0); | |
964d3577 | 3032 | /* |
2546420a A |
3033 | * We were set running, but not for the purposes of actually running. |
3034 | * This could be because the timer elapsed. Or it could be because the | |
3035 | * thread aborted. Either way, we need to return to userspace to exit. | |
3036 | * | |
3037 | * The call to workqueue_removethread will consume the lock. | |
964d3577 | 3038 | */ |
f1a1da6c | 3039 | |
2546420a A |
3040 | if (!first_use && |
3041 | tl->th_priority != qos_class_get_class_index(WQ_THREAD_CLEANUP_QOS)) { | |
3042 | // Reset the QoS to something low for the pthread cleanup | |
3043 | pthread_priority_t cleanup_pri = _pthread_priority_make_newest(WQ_THREAD_CLEANUP_QOS, 0, 0); | |
3044 | reset_priority(tl, cleanup_pri); | |
3045 | } | |
f1a1da6c | 3046 | |
2546420a | 3047 | workqueue_removethread(tl, 0, first_use); |
964d3577 | 3048 | |
2546420a A |
3049 | if (first_use){ |
3050 | pthread_kern->thread_bootstrap_return(); | |
3051 | } else { | |
3052 | pthread_kern->unix_syscall_return(0); | |
f1a1da6c | 3053 | } |
2546420a | 3054 | /* NOT REACHED */ |
f1a1da6c | 3055 | } |
964d3577 | 3056 | |
2546420a A |
3057 | /* |
3058 | * The timer woke us up or the thread was aborted. However, we have | |
3059 | * already started to make this a runnable thread. Wait for that to | |
3060 | * finish, then continue to userspace. | |
3061 | */ | |
3062 | while ((tl->th_flags & TH_LIST_BUSY)) { | |
3063 | assert_wait((caddr_t)tl, (THREAD_UNINT)); | |
964d3577 | 3064 | |
2546420a | 3065 | workqueue_unlock(wq); |
964d3577 | 3066 | |
2546420a | 3067 | thread_block(THREAD_CONTINUE_NULL); |
f1a1da6c | 3068 | |
2546420a A |
3069 | workqueue_lock_spin(wq); |
3070 | } | |
f1a1da6c | 3071 | |
2546420a A |
3072 | return_to_user: |
3073 | workqueue_unlock(wq); | |
3074 | _setup_wqthread(p, th, wq, tl, first_use); | |
3075 | pthread_kern->thread_sched_call(th, workqueue_callback); | |
3076 | done: | |
3077 | if (first_use){ | |
3078 | pthread_kern->thread_bootstrap_return(); | |
3079 | } else { | |
3080 | pthread_kern->unix_syscall_return(EJUSTRETURN); | |
3081 | } | |
3082 | panic("Our attempt to return to userspace failed..."); | |
3083 | } | |
f1a1da6c | 3084 | |
2546420a A |
3085 | /* called with workqueue lock held */ |
3086 | static void | |
3087 | wq_runreq(proc_t p, thread_t th, struct workqueue *wq, struct threadlist *tl, | |
3088 | boolean_t return_directly, boolean_t needs_kevent_bind) | |
f1a1da6c | 3089 | { |
2546420a | 3090 | PTHREAD_TRACE1_WQ(TRACE_wq_runitem | DBG_FUNC_START, tl->th_workq, 0, 0, thread_tid(current_thread()), thread_tid(th)); |
f1a1da6c | 3091 | |
2546420a A |
3092 | unsigned int kevent_flags = KEVENT_FLAG_WORKQ; |
3093 | if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) { | |
3094 | kevent_flags |= KEVENT_FLAG_WORKQ_MANAGER; | |
3095 | } | |
f1a1da6c A |
3096 | |
3097 | if (return_directly) { | |
2546420a A |
3098 | if (needs_kevent_bind) { |
3099 | assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0); | |
3100 | tl->th_flags |= TH_LIST_KEVENT_BOUND; | |
3101 | } | |
f1a1da6c | 3102 | |
2546420a | 3103 | workqueue_unlock(wq); |
f1a1da6c | 3104 | |
2546420a A |
3105 | if (needs_kevent_bind) { |
3106 | kevent_qos_internal_bind(p, class_index_get_thread_qos(tl->th_priority), th, kevent_flags); | |
3107 | } | |
3108 | ||
3109 | /* | |
3110 | * For preemption reasons, we want to reset the voucher as late as | |
3111 | * possible, so we do it in two places: | |
3112 | * - Just before parking (i.e. in parkit()) | |
3113 | * - Prior to doing the setup for the next workitem (i.e. here) | |
3114 | * | |
3115 | * Those two places are sufficient to ensure we always reset it before | |
3116 | * it goes back out to user space, but be careful to not break that | |
3117 | * guarantee. | |
3118 | */ | |
3119 | kern_return_t kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL); | |
3120 | assert(kr == KERN_SUCCESS); | |
f1a1da6c | 3121 | |
2546420a | 3122 | _setup_wqthread(p, th, wq, tl, false); |
f1a1da6c | 3123 | |
2546420a | 3124 | PTHREAD_TRACE_WQ(TRACE_wq_run_nextitem|DBG_FUNC_END, tl->th_workq, 0, 0, 4, 0); |
f1a1da6c | 3125 | |
2546420a A |
3126 | pthread_kern->unix_syscall_return(EJUSTRETURN); |
3127 | /* NOT REACHED */ | |
3128 | } | |
f1a1da6c | 3129 | |
2546420a A |
3130 | if (needs_kevent_bind) { |
3131 | // Leave TH_LIST_BUSY set so that the thread can't beat us to calling kevent | |
3132 | workqueue_unlock(wq); | |
3133 | assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0); | |
3134 | kevent_qos_internal_bind(p, class_index_get_thread_qos(tl->th_priority), th, kevent_flags); | |
3135 | tl->th_flags |= TH_LIST_KEVENT_BOUND; | |
3136 | workqueue_lock_spin(wq); | |
f1a1da6c | 3137 | } |
2546420a A |
3138 | tl->th_flags &= ~(TH_LIST_BUSY); |
3139 | thread_wakeup_thread(tl,th); | |
f1a1da6c A |
3140 | } |
3141 | ||
2546420a | 3142 | #define KEVENT_LIST_LEN 16 // WORKQ_KEVENT_EVENT_BUFFER_LEN |
964d3577 | 3143 | #define KEVENT_DATA_SIZE (32 * 1024) |
f1a1da6c | 3144 | |
964d3577 A |
3145 | /** |
3146 | * configures initial thread stack/registers to jump into: | |
2546420a | 3147 | * _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int upcall_flags, int nkevents); |
964d3577 A |
3148 | * to get there we jump through assembily stubs in pthread_asm.s. Those |
3149 | * routines setup a stack frame, using the current stack pointer, and marshall | |
3150 | * arguments from registers to the stack as required by the ABI. | |
3151 | * | |
3152 | * One odd thing we do here is to start the pthread_t 4k below what would be the | |
3153 | * top of the stack otherwise. This is because usually only the first 4k of the | |
3154 | * pthread_t will be used and so we want to put it on the same 16k page as the | |
3155 | * top of the stack to save memory. | |
3156 | * | |
3157 | * When we are done the stack will look like: | |
3158 | * |-----------| th_stackaddr + th_allocsize | |
3159 | * |pthread_t | th_stackaddr + DEFAULT_STACKSIZE + guardsize + PTHREAD_STACK_OFFSET | |
3160 | * |kevent list| optionally - at most KEVENT_LIST_LEN events | |
3161 | * |kevent data| optionally - at most KEVENT_DATA_SIZE bytes | |
3162 | * |stack gap | bottom aligned to 16 bytes, and at least as big as stack_gap_min | |
3163 | * | STACK | | |
3164 | * | ⇓ | | |
3165 | * | | | |
3166 | * |guard page | guardsize | |
3167 | * |-----------| th_stackaddr | |
3168 | */ | |
2546420a A |
3169 | void |
3170 | _setup_wqthread(proc_t p, thread_t th, struct workqueue *wq, struct threadlist *tl, | |
3171 | bool first_use) | |
f1a1da6c | 3172 | { |
2546420a A |
3173 | int error; |
3174 | uint32_t upcall_flags; | |
3175 | ||
3176 | pthread_priority_t priority = pthread_priority_from_wq_class_index(wq, tl->th_priority); | |
f1a1da6c | 3177 | |
964d3577 A |
3178 | const vm_size_t guardsize = vm_map_page_size(tl->th_workq->wq_map); |
3179 | const vm_size_t stack_gap_min = (proc_is64bit(p) == 0) ? C_32_STK_ALIGN : C_64_REDZONE_LEN; | |
3180 | const vm_size_t stack_align_min = (proc_is64bit(p) == 0) ? C_32_STK_ALIGN : C_64_STK_ALIGN; | |
3181 | ||
3182 | user_addr_t pthread_self_addr = (user_addr_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET); | |
3183 | user_addr_t stack_top_addr = (user_addr_t)((pthread_self_addr - stack_gap_min) & -stack_align_min); | |
3184 | user_addr_t stack_bottom_addr = (user_addr_t)(tl->th_stackaddr + guardsize); | |
f1a1da6c | 3185 | |
2546420a A |
3186 | user_addr_t wqstart_fnptr = pthread_kern->proc_get_wqthread(p); |
3187 | if (!wqstart_fnptr) { | |
3188 | panic("workqueue thread start function pointer is NULL"); | |
3189 | } | |
3190 | ||
f1a1da6c A |
3191 | /* Put the QoS class value into the lower bits of the reuse_thread register, this is where |
3192 | * the thread priority used to be stored anyway. | |
3193 | */ | |
2546420a A |
3194 | upcall_flags = tl->th_upcall_flags << WQ_FLAG_THREAD_PRIOSHIFT; |
3195 | upcall_flags |= (_pthread_priority_get_qos_newest(priority) & WQ_FLAG_THREAD_PRIOMASK); | |
f1a1da6c | 3196 | |
2546420a A |
3197 | upcall_flags |= WQ_FLAG_THREAD_NEWSPI; |
3198 | ||
3199 | uint32_t tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p); | |
3200 | if (tsd_offset) { | |
3201 | mach_vm_offset_t th_tsd_base = (mach_vm_offset_t)pthread_self_addr + tsd_offset; | |
3202 | kern_return_t kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base); | |
3203 | if (kret == KERN_SUCCESS) { | |
3204 | upcall_flags |= WQ_FLAG_THREAD_TSD_BASE_SET; | |
3205 | } | |
3206 | } | |
3207 | ||
3208 | if (first_use) { | |
3209 | /* | |
3210 | * Pre-fault the first page of the new thread's stack and the page that will | |
3211 | * contain the pthread_t structure. | |
3212 | */ | |
3213 | vm_map_t vmap = pthread_kern->current_map(); | |
3214 | if (vm_map_trunc_page_mask((vm_map_offset_t)(stack_top_addr - C_64_REDZONE_LEN), vm_map_page_mask(vmap)) != | |
3215 | vm_map_trunc_page_mask((vm_map_offset_t)pthread_self_addr, vm_map_page_mask(vmap))){ | |
3216 | vm_fault( vmap, | |
3217 | vm_map_trunc_page_mask((vm_map_offset_t)(stack_top_addr - C_64_REDZONE_LEN), vm_map_page_mask(vmap)), | |
3218 | VM_PROT_READ | VM_PROT_WRITE, | |
3219 | FALSE, | |
3220 | THREAD_UNINT, NULL, 0); | |
3221 | } | |
3222 | vm_fault( vmap, | |
3223 | vm_map_trunc_page_mask((vm_map_offset_t)pthread_self_addr, vm_map_page_mask(vmap)), | |
3224 | VM_PROT_READ | VM_PROT_WRITE, | |
3225 | FALSE, | |
3226 | THREAD_UNINT, NULL, 0); | |
3227 | } else { | |
3228 | upcall_flags |= WQ_FLAG_THREAD_REUSE; | |
3229 | } | |
f1a1da6c | 3230 | |
964d3577 A |
3231 | user_addr_t kevent_list = NULL; |
3232 | int kevent_count = 0; | |
2546420a | 3233 | if (upcall_flags & WQ_FLAG_THREAD_KEVENT){ |
964d3577 A |
3234 | kevent_list = pthread_self_addr - KEVENT_LIST_LEN * sizeof(struct kevent_qos_s); |
3235 | kevent_count = KEVENT_LIST_LEN; | |
f1a1da6c | 3236 | |
964d3577 A |
3237 | user_addr_t kevent_data_buf = kevent_list - KEVENT_DATA_SIZE; |
3238 | user_size_t kevent_data_available = KEVENT_DATA_SIZE; | |
f1a1da6c | 3239 | |
964d3577 | 3240 | int32_t events_out = 0; |
f1a1da6c | 3241 | |
2546420a A |
3242 | assert(tl->th_flags | TH_LIST_KEVENT_BOUND); |
3243 | unsigned int flags = KEVENT_FLAG_WORKQ | KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE; | |
3244 | if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) { | |
3245 | flags |= KEVENT_FLAG_WORKQ_MANAGER; | |
3246 | } | |
3247 | int ret = kevent_qos_internal(p, class_index_get_thread_qos(tl->th_priority), NULL, 0, kevent_list, kevent_count, | |
964d3577 | 3248 | kevent_data_buf, &kevent_data_available, |
2546420a | 3249 | flags, &events_out); |
f1a1da6c | 3250 | |
2546420a A |
3251 | // turns out there are a lot of edge cases where this will fail, so not enabled by default |
3252 | //assert((ret == KERN_SUCCESS && events_out != -1) || ret == KERN_ABORTED); | |
3253 | ||
3254 | // squash any errors into just empty output on | |
964d3577 A |
3255 | if (ret != KERN_SUCCESS || events_out == -1){ |
3256 | events_out = 0; | |
3257 | kevent_data_available = KEVENT_DATA_SIZE; | |
3258 | } | |
3259 | ||
3260 | // We shouldn't get data out if there aren't events available | |
3261 | assert(events_out != 0 || kevent_data_available == KEVENT_DATA_SIZE); | |
3262 | ||
2546420a | 3263 | if (events_out > 0){ |
964d3577 A |
3264 | if (kevent_data_available == KEVENT_DATA_SIZE){ |
3265 | stack_top_addr = (kevent_list - stack_gap_min) & -stack_align_min; | |
3266 | } else { | |
3267 | stack_top_addr = (kevent_data_buf + kevent_data_available - stack_gap_min) & -stack_align_min; | |
3268 | } | |
2546420a A |
3269 | |
3270 | kevent_count = events_out; | |
964d3577 A |
3271 | } else { |
3272 | kevent_list = NULL; | |
3273 | kevent_count = 0; | |
3274 | } | |
3275 | } | |
3276 | ||
3277 | #if defined(__i386__) || defined(__x86_64__) | |
2546420a | 3278 | if (proc_is64bit(p) == 0) { |
964d3577 | 3279 | x86_thread_state32_t state = { |
2546420a | 3280 | .eip = (unsigned int)wqstart_fnptr, |
964d3577 A |
3281 | .eax = /* arg0 */ (unsigned int)pthread_self_addr, |
3282 | .ebx = /* arg1 */ (unsigned int)tl->th_thport, | |
3283 | .ecx = /* arg2 */ (unsigned int)stack_bottom_addr, | |
3284 | .edx = /* arg3 */ (unsigned int)kevent_list, | |
2546420a | 3285 | .edi = /* arg4 */ (unsigned int)upcall_flags, |
964d3577 A |
3286 | .esi = /* arg5 */ (unsigned int)kevent_count, |
3287 | ||
3288 | .esp = (int)((vm_offset_t)stack_top_addr), | |
3289 | }; | |
3290 | ||
2546420a A |
3291 | error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); |
3292 | if (error != KERN_SUCCESS) { | |
3293 | panic(__func__ ": thread_set_wq_state failed: %d", error); | |
3294 | } | |
964d3577 A |
3295 | } else { |
3296 | x86_thread_state64_t state64 = { | |
3297 | // x86-64 already passes all the arguments in registers, so we just put them in their final place here | |
2546420a | 3298 | .rip = (uint64_t)wqstart_fnptr, |
964d3577 A |
3299 | .rdi = (uint64_t)pthread_self_addr, |
3300 | .rsi = (uint64_t)tl->th_thport, | |
3301 | .rdx = (uint64_t)stack_bottom_addr, | |
3302 | .rcx = (uint64_t)kevent_list, | |
2546420a | 3303 | .r8 = (uint64_t)upcall_flags, |
964d3577 A |
3304 | .r9 = (uint64_t)kevent_count, |
3305 | ||
3306 | .rsp = (uint64_t)(stack_top_addr) | |
3307 | }; | |
3308 | ||
3309 | error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64); | |
f1a1da6c | 3310 | if (error != KERN_SUCCESS) { |
2546420a | 3311 | panic(__func__ ": thread_set_wq_state failed: %d", error); |
f1a1da6c A |
3312 | } |
3313 | } | |
3314 | #else | |
3315 | #error setup_wqthread not defined for this architecture | |
3316 | #endif | |
f1a1da6c A |
3317 | } |
3318 | ||
964d3577 A |
3319 | #if DEBUG |
3320 | static int wq_kevent_test SYSCTL_HANDLER_ARGS { | |
3321 | //(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req) | |
3322 | #pragma unused(oidp, arg1, arg2) | |
3323 | int error; | |
3324 | struct workq_reqthreads_req_s requests[64] = {}; | |
3325 | ||
3326 | if (req->newlen > sizeof(requests) || req->newlen < sizeof(struct workq_reqthreads_req_s)) | |
3327 | return EINVAL; | |
3328 | ||
3329 | error = copyin(req->newptr, requests, req->newlen); | |
3330 | if (error) return error; | |
3331 | ||
3332 | _workq_reqthreads(req->p, (int)(req->newlen / sizeof(struct workq_reqthreads_req_s)), requests); | |
3333 | ||
3334 | return 0; | |
3335 | } | |
3336 | #endif // DEBUG | |
3337 | ||
3338 | #pragma mark - Misc | |
3339 | ||
f1a1da6c A |
3340 | int |
3341 | _fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo) | |
3342 | { | |
3343 | struct workqueue * wq; | |
3344 | int error = 0; | |
3345 | int activecount; | |
3346 | uint32_t pri; | |
3347 | ||
f1a1da6c | 3348 | if ((wq = pthread_kern->proc_get_wqptr(p)) == NULL) { |
2546420a | 3349 | return EINVAL; |
f1a1da6c | 3350 | } |
2546420a A |
3351 | |
3352 | workqueue_lock_spin(wq); | |
f1a1da6c A |
3353 | activecount = 0; |
3354 | ||
3355 | for (pri = 0; pri < WORKQUEUE_NUM_BUCKETS; pri++) { | |
3356 | activecount += wq->wq_thactive_count[pri]; | |
3357 | } | |
3358 | pwqinfo->pwq_nthreads = wq->wq_nthreads; | |
3359 | pwqinfo->pwq_runthreads = activecount; | |
3360 | pwqinfo->pwq_blockedthreads = wq->wq_threads_scheduled - activecount; | |
3361 | pwqinfo->pwq_state = 0; | |
3362 | ||
2546420a | 3363 | if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) { |
f1a1da6c A |
3364 | pwqinfo->pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT; |
3365 | } | |
3366 | ||
2546420a | 3367 | if (wq->wq_nthreads >= wq_max_threads) { |
f1a1da6c A |
3368 | pwqinfo->pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT; |
3369 | } | |
3370 | ||
2546420a | 3371 | workqueue_unlock(wq); |
f1a1da6c A |
3372 | return(error); |
3373 | } | |
3374 | ||
2546420a A |
3375 | uint32_t |
3376 | _get_pwq_state_kdp(proc_t p) | |
3377 | { | |
3378 | if (p == NULL) { | |
3379 | return 0; | |
3380 | } | |
3381 | ||
3382 | struct workqueue *wq = pthread_kern->proc_get_wqptr(p); | |
3383 | ||
3384 | if (wq == NULL || workqueue_lock_spin_is_acquired_kdp(wq)) { | |
3385 | return 0; | |
3386 | } | |
3387 | ||
3388 | uint32_t pwq_state = WQ_FLAGS_AVAILABLE; | |
3389 | ||
3390 | if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) { | |
3391 | pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT; | |
3392 | } | |
3393 | ||
3394 | if (wq->wq_nthreads >= wq_max_threads) { | |
3395 | pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT; | |
3396 | } | |
3397 | ||
3398 | return pwq_state; | |
3399 | } | |
3400 | ||
f1a1da6c A |
3401 | int |
3402 | _thread_selfid(__unused struct proc *p, uint64_t *retval) | |
3403 | { | |
3404 | thread_t thread = current_thread(); | |
3405 | *retval = thread_tid(thread); | |
3406 | return KERN_SUCCESS; | |
3407 | } | |
3408 | ||
3409 | void | |
3410 | _pthread_init(void) | |
3411 | { | |
3412 | pthread_lck_grp_attr = lck_grp_attr_alloc_init(); | |
3413 | pthread_lck_grp = lck_grp_alloc_init("pthread", pthread_lck_grp_attr); | |
3414 | ||
3415 | /* | |
3416 | * allocate the lock attribute for pthread synchronizers | |
3417 | */ | |
3418 | pthread_lck_attr = lck_attr_alloc_init(); | |
3419 | ||
f1a1da6c A |
3420 | pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr); |
3421 | ||
3422 | pth_global_hashinit(); | |
3423 | psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL); | |
3424 | psynch_zoneinit(); | |
3425 | ||
3426 | /* | |
3427 | * register sysctls | |
3428 | */ | |
3429 | sysctl_register_oid(&sysctl__kern_wq_yielded_threshold); | |
3430 | sysctl_register_oid(&sysctl__kern_wq_yielded_window_usecs); | |
3431 | sysctl_register_oid(&sysctl__kern_wq_stalled_window_usecs); | |
3432 | sysctl_register_oid(&sysctl__kern_wq_reduce_pool_window_usecs); | |
3433 | sysctl_register_oid(&sysctl__kern_wq_max_timer_interval_usecs); | |
3434 | sysctl_register_oid(&sysctl__kern_wq_max_threads); | |
3435 | sysctl_register_oid(&sysctl__kern_wq_max_constrained_threads); | |
3436 | sysctl_register_oid(&sysctl__kern_pthread_debug_tracing); | |
964d3577 A |
3437 | |
3438 | #if DEBUG | |
3439 | sysctl_register_oid(&sysctl__kern_wq_max_concurrency); | |
3440 | sysctl_register_oid(&sysctl__debug_wq_kevent_test); | |
3441 | #endif | |
3442 | ||
3443 | wq_max_concurrency = pthread_kern->ml_get_max_cpus(); | |
3444 | ||
f1a1da6c | 3445 | } |