]> git.saurik.com Git - apple/libpthread.git/blob - kern/kern_support.c
cdd2ed04fde64ef5a955b447ba0e0ff350ef4ca8
[apple/libpthread.git] / kern / kern_support.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
29 /*
30 * pthread_synch.c
31 */
32
33 #define _PTHREAD_CONDATTR_T
34 #define _PTHREAD_COND_T
35 #define _PTHREAD_MUTEXATTR_T
36 #define _PTHREAD_MUTEX_T
37 #define _PTHREAD_RWLOCKATTR_T
38 #define _PTHREAD_RWLOCK_T
39
40 #undef pthread_mutexattr_t
41 #undef pthread_mutex_t
42 #undef pthread_condattr_t
43 #undef pthread_cond_t
44 #undef pthread_rwlockattr_t
45 #undef pthread_rwlock_t
46
47 #include <sys/param.h>
48 #include <sys/queue.h>
49 #include <sys/resourcevar.h>
50 //#include <sys/proc_internal.h>
51 #include <sys/kauth.h>
52 #include <sys/systm.h>
53 #include <sys/timeb.h>
54 #include <sys/times.h>
55 #include <sys/acct.h>
56 #include <sys/kernel.h>
57 #include <sys/wait.h>
58 #include <sys/signalvar.h>
59 #include <sys/sysctl.h>
60 #include <sys/syslog.h>
61 #include <sys/stat.h>
62 #include <sys/lock.h>
63 #include <sys/kdebug.h>
64 //#include <sys/sysproto.h>
65 #include <sys/vm.h>
66 #include <sys/user.h> /* for coredump */
67 #include <sys/proc_info.h> /* for fill_procworkqueue */
68
69
70 #include <mach/mach_port.h>
71 #include <mach/mach_types.h>
72 #include <mach/semaphore.h>
73 #include <mach/sync_policy.h>
74 #include <mach/task.h>
75 #include <mach/vm_prot.h>
76 #include <kern/kern_types.h>
77 #include <kern/task.h>
78 #include <kern/clock.h>
79 #include <mach/kern_return.h>
80 #include <kern/thread.h>
81 #include <kern/sched_prim.h>
82 #include <kern/kalloc.h>
83 #include <kern/sched_prim.h> /* for thread_exception_return */
84 #include <kern/processor.h>
85 #include <kern/assert.h>
86 #include <mach/mach_vm.h>
87 #include <mach/mach_param.h>
88 #include <mach/thread_status.h>
89 #include <mach/thread_policy.h>
90 #include <mach/message.h>
91 #include <mach/port.h>
92 //#include <vm/vm_protos.h>
93 #include <vm/vm_fault.h>
94 #include <vm/vm_map.h>
95 #include <mach/thread_act.h> /* for thread_resume */
96 #include <machine/machine_routines.h>
97
98 #include <libkern/OSAtomic.h>
99
100 #include <sys/pthread_shims.h>
101 #include "kern_internal.h"
102
103 uint32_t pthread_debug_tracing = 0;
104
105 SYSCTL_INT(_kern, OID_AUTO, pthread_debug_tracing, CTLFLAG_RW | CTLFLAG_LOCKED,
106 &pthread_debug_tracing, 0, "")
107
108 // XXX: Dirty import for sys/signarvar.h that's wrapped in BSD_KERNEL_PRIVATE
109 #define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP))
110
111 lck_grp_attr_t *pthread_lck_grp_attr;
112 lck_grp_t *pthread_lck_grp;
113 lck_attr_t *pthread_lck_attr;
114
115 extern void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64);
116 extern void workqueue_thread_yielded(void);
117
118 static boolean_t workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t th, boolean_t force_oc,
119 boolean_t overcommit, pthread_priority_t oc_prio);
120
121 static boolean_t workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, pthread_priority_t priority);
122
123 static void wq_runreq(proc_t p, boolean_t overcommit, pthread_priority_t priority, thread_t th, struct threadlist *tl,
124 int reuse_thread, int wake_thread, int return_directly);
125
126 static int _setup_wqthread(proc_t p, thread_t th, boolean_t overcommit, pthread_priority_t priority, int reuse_thread, struct threadlist *tl);
127
128 static void wq_unpark_continue(void);
129 static void wq_unsuspend_continue(void);
130
131 static boolean_t workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread);
132 static void workqueue_removethread(struct threadlist *tl, int fromexit);
133 static void workqueue_lock_spin(proc_t);
134 static void workqueue_unlock(proc_t);
135
136 int proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc);
137 int proc_setalltargetconc(pid_t pid, int32_t * targetconcp);
138
139 #define WQ_MAXPRI_MIN 0 /* low prio queue num */
140 #define WQ_MAXPRI_MAX 2 /* max prio queuenum */
141 #define WQ_PRI_NUM 3 /* number of prio work queues */
142
143 #define C_32_STK_ALIGN 16
144 #define C_64_STK_ALIGN 16
145 #define C_64_REDZONE_LEN 128
146 #define TRUNC_DOWN32(a,c) ((((uint32_t)a)-(c)) & ((uint32_t)(-(c))))
147 #define TRUNC_DOWN64(a,c) ((((uint64_t)a)-(c)) & ((uint64_t)(-(c))))
148
149 /*
150 * Flags filed passed to bsdthread_create and back in pthread_start
151 31 <---------------------------------> 0
152 _________________________________________
153 | flags(8) | policy(8) | importance(16) |
154 -----------------------------------------
155 */
156
157 #define PTHREAD_START_CUSTOM 0x01000000
158 #define PTHREAD_START_SETSCHED 0x02000000
159 #define PTHREAD_START_DETACHED 0x04000000
160 #define PTHREAD_START_QOSCLASS 0x08000000
161 #define PTHREAD_START_QOSCLASS_MASK 0xffffff
162 #define PTHREAD_START_POLICY_BITSHIFT 16
163 #define PTHREAD_START_POLICY_MASK 0xff
164 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
165
166 #define SCHED_OTHER POLICY_TIMESHARE
167 #define SCHED_FIFO POLICY_FIFO
168 #define SCHED_RR POLICY_RR
169
170 int
171 _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcarg, user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags, user_addr_t *retval)
172 {
173 kern_return_t kret;
174 void * sright;
175 int error = 0;
176 int allocated = 0;
177 mach_vm_offset_t stackaddr;
178 mach_vm_size_t th_allocsize = 0;
179 mach_vm_size_t user_stacksize;
180 mach_vm_size_t th_stacksize;
181 mach_vm_size_t th_guardsize;
182 mach_vm_offset_t th_stackaddr;
183 mach_vm_offset_t th_stack;
184 mach_vm_offset_t th_pthread;
185 mach_port_name_t th_thport;
186 thread_t th;
187 vm_map_t vmap = pthread_kern->current_map();
188 task_t ctask = current_task();
189 unsigned int policy, importance;
190
191 int isLP64 = 0;
192
193 if (pthread_kern->proc_get_register(p) == 0) {
194 return EINVAL;
195 }
196
197 PTHREAD_TRACE(TRACE_pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0, 0);
198
199 isLP64 = proc_is64bit(p);
200 th_guardsize = vm_map_page_size(vmap);
201
202 #if defined(__i386__) || defined(__x86_64__)
203 stackaddr = 0xB0000000;
204 #else
205 #error Need to define a stack address hint for this architecture
206 #endif
207 kret = pthread_kern->thread_create(ctask, &th);
208 if (kret != KERN_SUCCESS)
209 return(ENOMEM);
210 thread_reference(th);
211
212 sright = (void *)pthread_kern->convert_thread_to_port(th);
213 th_thport = pthread_kern->ipc_port_copyout_send(sright, pthread_kern->task_get_ipcspace(ctask));
214
215 if ((flags & PTHREAD_START_CUSTOM) == 0) {
216 th_stacksize = (mach_vm_size_t)user_stack; /* if it is custom them it is stacksize */
217 th_allocsize = th_stacksize + th_guardsize + pthread_kern->proc_get_pthsize(p);
218
219 kret = mach_vm_map(vmap, &stackaddr,
220 th_allocsize,
221 page_size-1,
222 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
223 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
224 VM_INHERIT_DEFAULT);
225 if (kret != KERN_SUCCESS)
226 kret = mach_vm_allocate(vmap,
227 &stackaddr, th_allocsize,
228 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
229 if (kret != KERN_SUCCESS) {
230 error = ENOMEM;
231 goto out;
232 }
233
234 PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, th_allocsize, stackaddr, 0, 2, 0);
235
236 th_stackaddr = stackaddr;
237 allocated = 1;
238 /*
239 * The guard page is at the lowest address
240 * The stack base is the highest address
241 */
242 kret = mach_vm_protect(vmap, stackaddr, th_guardsize, FALSE, VM_PROT_NONE);
243
244 if (kret != KERN_SUCCESS) {
245 error = ENOMEM;
246 goto out1;
247 }
248 th_stack = (stackaddr + th_stacksize + th_guardsize);
249 th_pthread = (stackaddr + th_stacksize + th_guardsize);
250 user_stacksize = th_stacksize;
251
252 /*
253 * Pre-fault the first page of the new thread's stack and the page that will
254 * contain the pthread_t structure.
255 */
256 vm_fault( vmap,
257 vm_map_trunc_page_mask(th_stack - PAGE_SIZE_64, vm_map_page_mask(vmap)),
258 VM_PROT_READ | VM_PROT_WRITE,
259 FALSE,
260 THREAD_UNINT, NULL, 0);
261
262 vm_fault( vmap,
263 vm_map_trunc_page_mask(th_pthread, vm_map_page_mask(vmap)),
264 VM_PROT_READ | VM_PROT_WRITE,
265 FALSE,
266 THREAD_UNINT, NULL, 0);
267 } else {
268 th_stack = user_stack;
269 user_stacksize = user_stack;
270 th_pthread = user_pthread;
271
272 PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3, 0);
273 }
274
275 #if defined(__i386__) || defined(__x86_64__)
276 /*
277 * Set up i386 registers & function call.
278 */
279 if (isLP64 == 0) {
280 x86_thread_state32_t state;
281 x86_thread_state32_t *ts = &state;
282
283 ts->eip = (unsigned int)pthread_kern->proc_get_threadstart(p);
284 ts->eax = (unsigned int)th_pthread;
285 ts->ebx = (unsigned int)th_thport;
286 ts->ecx = (unsigned int)user_func;
287 ts->edx = (unsigned int)user_funcarg;
288 ts->edi = (unsigned int)user_stacksize;
289 ts->esi = (unsigned int)flags;
290 /*
291 * set stack pointer
292 */
293 ts->esp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN));
294
295 error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)ts);
296 if (error != KERN_SUCCESS) {
297 error = EINVAL;
298 goto out;
299 }
300 } else {
301 x86_thread_state64_t state64;
302 x86_thread_state64_t *ts64 = &state64;
303
304 ts64->rip = (uint64_t)pthread_kern->proc_get_threadstart(p);
305 ts64->rdi = (uint64_t)th_pthread;
306 ts64->rsi = (uint64_t)(th_thport);
307 ts64->rdx = (uint64_t)user_func;
308 ts64->rcx = (uint64_t)user_funcarg;
309 ts64->r8 = (uint64_t)user_stacksize;
310 ts64->r9 = (uint64_t)flags;
311 /*
312 * set stack pointer aligned to 16 byte boundary
313 */
314 ts64->rsp = (uint64_t)(th_stack - C_64_REDZONE_LEN);
315
316 error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)ts64);
317 if (error != KERN_SUCCESS) {
318 error = EINVAL;
319 goto out;
320 }
321
322 }
323 #elif defined(__arm__)
324 arm_thread_state_t state;
325 arm_thread_state_t *ts = &state;
326
327 ts->pc = (int)pthread_kern->proc_get_threadstart(p);
328 ts->r[0] = (unsigned int)th_pthread;
329 ts->r[1] = (unsigned int)th_thport;
330 ts->r[2] = (unsigned int)user_func;
331 ts->r[3] = (unsigned int)user_funcarg;
332 ts->r[4] = (unsigned int)user_stacksize;
333 ts->r[5] = (unsigned int)flags;
334
335 /* Set r7 & lr to 0 for better back tracing */
336 ts->r[7] = 0;
337 ts->lr = 0;
338
339 /*
340 * set stack pointer
341 */
342 ts->sp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN));
343
344 (void) pthread_kern->thread_set_wq_state32(th, (thread_state_t)ts);
345
346 #else
347 #error bsdthread_create not defined for this architecture
348 #endif
349
350 if ((flags & PTHREAD_START_SETSCHED) != 0) {
351 /* Set scheduling parameters if needed */
352 thread_extended_policy_data_t extinfo;
353 thread_precedence_policy_data_t precedinfo;
354
355 importance = (flags & PTHREAD_START_IMPORTANCE_MASK);
356 policy = (flags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK;
357
358 if (policy == SCHED_OTHER) {
359 extinfo.timeshare = 1;
360 } else {
361 extinfo.timeshare = 0;
362 }
363
364 thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT);
365
366 #define BASEPRI_DEFAULT 31
367 precedinfo.importance = (importance - BASEPRI_DEFAULT);
368 thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
369 } else if ((flags & PTHREAD_START_QOSCLASS) != 0) {
370 /* Set thread QoS class if requested. */
371 pthread_priority_t priority = (pthread_priority_t)(flags & PTHREAD_START_QOSCLASS_MASK);
372
373 thread_qos_policy_data_t qos;
374 qos.qos_tier = pthread_priority_get_qos_class(priority);
375 qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 :
376 _pthread_priority_get_relpri(priority);
377
378 pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
379 }
380
381 kret = pthread_kern->thread_resume(th);
382 if (kret != KERN_SUCCESS) {
383 error = EINVAL;
384 goto out1;
385 }
386 thread_deallocate(th); /* drop the creator reference */
387
388 PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_END, error, th_pthread, 0, 0, 0);
389
390 *retval = th_pthread;
391
392 return(0);
393
394 out1:
395 if (allocated != 0) {
396 (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize);
397 }
398 out:
399 (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(ctask), th_thport);
400 (void)thread_terminate(th);
401 (void)thread_deallocate(th);
402 return(error);
403 }
404
405 int
406 _bsdthread_terminate(__unused struct proc *p,
407 user_addr_t stackaddr,
408 size_t size,
409 uint32_t kthport,
410 uint32_t sem,
411 __unused int32_t *retval)
412 {
413 mach_vm_offset_t freeaddr;
414 mach_vm_size_t freesize;
415 kern_return_t kret;
416
417 freeaddr = (mach_vm_offset_t)stackaddr;
418 freesize = size;
419
420 PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff, 0);
421
422 if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) {
423 kret = mach_vm_deallocate(pthread_kern->current_map(), freeaddr, freesize);
424 if (kret != KERN_SUCCESS) {
425 PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0);
426 return(EINVAL);
427 }
428 }
429
430 (void) thread_terminate(current_thread());
431 if (sem != MACH_PORT_NULL) {
432 kret = pthread_kern->semaphore_signal_internal_trap(sem);
433 if (kret != KERN_SUCCESS) {
434 PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0);
435 return(EINVAL);
436 }
437 }
438
439 if (kthport != MACH_PORT_NULL) {
440 pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(current_task()), kthport);
441 }
442
443 PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0, 0);
444
445 pthread_kern->thread_exception_return();
446 panic("bsdthread_terminate: still running\n");
447
448 PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0xff, 0, 0, 0);
449
450 return(0);
451 }
452
453 int
454 _bsdthread_register(struct proc *p,
455 user_addr_t threadstart,
456 user_addr_t wqthread,
457 int pthsize,
458 user_addr_t pthread_init_data,
459 user_addr_t targetconc_ptr,
460 uint64_t dispatchqueue_offset,
461 int32_t *retval)
462 {
463 /* prevent multiple registrations */
464 if (pthread_kern->proc_get_register(p) != 0) {
465 return(EINVAL);
466 }
467 /* syscall randomizer test can pass bogus values */
468 if (pthsize < 0 || pthsize > MAX_PTHREAD_SIZE) {
469 return(EINVAL);
470 }
471 pthread_kern->proc_set_threadstart(p, threadstart);
472 pthread_kern->proc_set_wqthread(p, wqthread);
473 pthread_kern->proc_set_pthsize(p, pthsize);
474 pthread_kern->proc_set_register(p);
475
476 /* if we have pthread_init_data, then we use that and target_concptr (which is an offset) get data. */
477 if (pthread_init_data != 0) {
478 thread_qos_policy_data_t qos;
479
480 struct _pthread_registration_data data;
481 size_t pthread_init_sz = MIN(sizeof(struct _pthread_registration_data), (size_t)targetconc_ptr);
482
483 kern_return_t kr = copyin(pthread_init_data, &data, pthread_init_sz);
484 if (kr != KERN_SUCCESS) {
485 return EINVAL;
486 }
487
488 /* Incoming data from the data structure */
489 pthread_kern->proc_set_dispatchqueue_offset(p, data.dispatch_queue_offset);
490
491 /* Outgoing data that userspace expects as a reply */
492 if (pthread_kern->qos_main_thread_active()) {
493 mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT;
494 boolean_t gd = FALSE;
495
496 kr = pthread_kern->thread_policy_get(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
497 if (kr != KERN_SUCCESS || qos.qos_tier == THREAD_QOS_UNSPECIFIED) {
498 /* Unspecified threads means the kernel wants us to impose legacy upon the thread. */
499 qos.qos_tier = THREAD_QOS_LEGACY;
500 qos.tier_importance = 0;
501
502 kr = pthread_kern->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
503 }
504
505 if (kr == KERN_SUCCESS) {
506 data.main_qos = pthread_qos_class_get_priority(qos.qos_tier);
507 } else {
508 data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
509 }
510 } else {
511 data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
512 }
513
514 kr = copyout(&data, pthread_init_data, pthread_init_sz);
515 if (kr != KERN_SUCCESS) {
516 return EINVAL;
517 }
518 } else {
519 pthread_kern->proc_set_dispatchqueue_offset(p, dispatchqueue_offset);
520 pthread_kern->proc_set_targconc(p, targetconc_ptr);
521 }
522
523 /* return the supported feature set as the return value. */
524 *retval = PTHREAD_FEATURE_SUPPORTED;
525
526 return(0);
527 }
528
529 int
530 _bsdthread_ctl_set_qos(struct proc *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t tsd_priority_addr, user_addr_t arg3, int *retval)
531 {
532 kern_return_t kr;
533 thread_t th;
534
535 pthread_priority_t priority;
536
537 /* Unused parameters must be zero. */
538 if (arg3 != 0) {
539 return EINVAL;
540 }
541
542 /* QoS is stored in a given slot in the pthread TSD. We need to copy that in and set our QoS based on it. */
543 if (proc_is64bit(p)) {
544 uint64_t v;
545 kr = copyin(tsd_priority_addr, &v, sizeof(v));
546 if (kr != KERN_SUCCESS) {
547 return kr;
548 }
549 priority = (int)(v & 0xffffffff);
550 } else {
551 uint32_t v;
552 kr = copyin(tsd_priority_addr, &v, sizeof(v));
553 if (kr != KERN_SUCCESS) {
554 return kr;
555 }
556 priority = v;
557 }
558
559 if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
560 return ESRCH;
561 }
562
563 /* <rdar://problem/16211829> Disable pthread_set_qos_class_np() on threads other than pthread_self */
564 if (th != current_thread()) {
565 thread_deallocate(th);
566 return EPERM;
567 }
568
569 int rv = _bsdthread_ctl_set_self(p, 0, priority, 0, _PTHREAD_SET_SELF_QOS_FLAG, retval);
570
571 /* Static param the thread, we just set QoS on it, so its stuck in QoS land now. */
572 /* pthread_kern->thread_static_param(th, TRUE); */ // see <rdar://problem/16433744>, for details
573
574 thread_deallocate(th);
575
576 return rv;
577 }
578
579 static inline struct threadlist *
580 util_get_thread_threadlist_entry(thread_t th)
581 {
582 struct uthread *uth = pthread_kern->get_bsdthread_info(th);
583 if (uth) {
584 struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
585 return tl;
586 }
587 return NULL;
588 }
589
590 static inline void
591 wq_thread_override_reset(thread_t th, user_addr_t resource)
592 {
593 struct uthread *uth = pthread_kern->get_bsdthread_info(th);
594 struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
595
596 if (tl) {
597 /*
598 * Drop all outstanding overrides on this thread, done outside the wq lock
599 * because proc_usynch_thread_qos_remove_override_for_resource takes a spinlock that
600 * could cause us to panic.
601 */
602 PTHREAD_TRACE(TRACE_wq_override_reset | DBG_FUNC_NONE, tl->th_workq, 0, 0, 0, 0);
603
604 pthread_kern->proc_usynch_thread_qos_reset_override_for_resource(current_task(), uth, 0, resource, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE);
605 }
606 }
607
608 int
609 _bsdthread_ctl_set_self(struct proc *p, user_addr_t __unused cmd, pthread_priority_t priority, mach_port_name_t voucher, _pthread_set_flags_t flags, int __unused *retval)
610 {
611 thread_qos_policy_data_t qos;
612 mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT;
613 boolean_t gd = FALSE;
614
615 kern_return_t kr;
616 int qos_rv = 0, voucher_rv = 0, fixedpri_rv = 0;
617
618 if ((flags & _PTHREAD_SET_SELF_QOS_FLAG) != 0) {
619 kr = pthread_kern->thread_policy_get(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
620 if (kr != KERN_SUCCESS) {
621 qos_rv = EINVAL;
622 goto voucher;
623 }
624
625 /* If we have main-thread QoS then we don't allow a thread to come out of QOS_CLASS_UNSPECIFIED. */
626 if (pthread_kern->qos_main_thread_active() && qos.qos_tier == THREAD_QOS_UNSPECIFIED) {
627 qos_rv = EPERM;
628 goto voucher;
629 }
630
631 /* Get the work queue for tracing, also the threadlist for bucket manipluation. */
632 struct workqueue *wq = NULL;
633 struct threadlist *tl = util_get_thread_threadlist_entry(current_thread());
634 if (tl) {
635 wq = tl->th_workq;
636 }
637
638 PTHREAD_TRACE(TRACE_pthread_set_qos_self | DBG_FUNC_START, wq, qos.qos_tier, qos.tier_importance, 0, 0);
639
640 qos.qos_tier = pthread_priority_get_qos_class(priority);
641 qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 : _pthread_priority_get_relpri(priority);
642
643 kr = pthread_kern->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
644 if (kr != KERN_SUCCESS) {
645 qos_rv = EINVAL;
646 goto voucher;
647 }
648
649 /* If we're a workqueue, the threadlist item priority needs adjusting, along with the bucket we were running in. */
650 if (tl) {
651 workqueue_lock_spin(p);
652
653 /* Fix up counters. */
654 uint8_t old_bucket = tl->th_priority;
655 uint8_t new_bucket = pthread_priority_get_class_index(priority);
656
657 uint32_t old_active = OSAddAtomic(-1, &wq->wq_thactive_count[old_bucket]);
658 OSAddAtomic(1, &wq->wq_thactive_count[new_bucket]);
659
660 wq->wq_thscheduled_count[old_bucket]--;
661 wq->wq_thscheduled_count[new_bucket]++;
662
663 tl->th_priority = new_bucket;
664
665 /* If we were at the ceiling of non-overcommitted threads for a given bucket, we have to
666 * reevaluate whether we should start more work.
667 */
668 if (old_active == wq->wq_reqconc[old_bucket]) {
669 /* workqueue_run_nextreq will drop the workqueue lock in all exit paths. */
670 (void)workqueue_run_nextreq(p, wq, THREAD_NULL, FALSE, FALSE, 0);
671 } else {
672 workqueue_unlock(p);
673 }
674 }
675
676 PTHREAD_TRACE(TRACE_pthread_set_qos_self | DBG_FUNC_END, wq, qos.qos_tier, qos.tier_importance, 0, 0);
677 }
678
679 voucher:
680 if ((flags & _PTHREAD_SET_SELF_VOUCHER_FLAG) != 0) {
681 kr = pthread_kern->thread_set_voucher_name(voucher);
682 if (kr != KERN_SUCCESS) {
683 voucher_rv = ENOENT;
684 goto fixedpri;
685 }
686 }
687
688 fixedpri:
689 if ((flags & _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG) != 0) {
690 thread_extended_policy_data_t extpol;
691 thread_t thread = current_thread();
692
693 extpol.timeshare = 0;
694
695 struct threadlist *tl = util_get_thread_threadlist_entry(thread);
696 if (tl) {
697 /* Not allowed on workqueue threads, since there is no symmetric clear function */
698 fixedpri_rv = ENOTSUP;
699 goto done;
700 }
701
702 kr = pthread_kern->thread_policy_set_internal(thread, THREAD_EXTENDED_POLICY, (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT);
703 if (kr != KERN_SUCCESS) {
704 fixedpri_rv = EINVAL;
705 goto done;
706 }
707 }
708
709 done:
710 if (qos_rv && voucher_rv) {
711 /* Both failed, give that a unique error. */
712 return EBADMSG;
713 }
714
715 if (qos_rv) {
716 return qos_rv;
717 }
718
719 if (voucher_rv) {
720 return voucher_rv;
721 }
722
723 if (fixedpri_rv) {
724 return fixedpri_rv;
725 }
726
727 return 0;
728 }
729
730 int
731 _bsdthread_ctl_qos_override_start(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, int __unused *retval)
732 {
733 thread_t th;
734 int rv = 0;
735
736 if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
737 return ESRCH;
738 }
739
740 struct uthread *uth = pthread_kern->get_bsdthread_info(th);
741 int override_qos = pthread_priority_get_qos_class(priority);
742
743 struct threadlist *tl = util_get_thread_threadlist_entry(th);
744 if (tl) {
745 PTHREAD_TRACE(TRACE_wq_override_start | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 1, priority, 0);
746 }
747
748 /* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */
749 pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), uth, 0, override_qos, TRUE, resource, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE);
750
751 thread_deallocate(th);
752 return rv;
753 }
754
755 int
756 _bsdthread_ctl_qos_override_end(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t resource, user_addr_t arg3, int __unused *retval)
757 {
758 thread_t th;
759 int rv = 0;
760
761 if (arg3 != 0) {
762 return EINVAL;
763 }
764
765 if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
766 return ESRCH;
767 }
768
769 struct uthread *uth = pthread_kern->get_bsdthread_info(th);
770
771 struct threadlist *tl = util_get_thread_threadlist_entry(th);
772 if (tl) {
773 PTHREAD_TRACE(TRACE_wq_override_end | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 0, 0, 0);
774 }
775
776 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), uth, 0, resource, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE);
777
778 thread_deallocate(th);
779 return rv;
780 }
781
782 int
783 _bsdthread_ctl_qos_override_dispatch(struct proc *p, user_addr_t cmd, mach_port_name_t kport, pthread_priority_t priority, user_addr_t arg3, int *retval)
784 {
785 if (arg3 != 0) {
786 return EINVAL;
787 }
788
789 return _bsdthread_ctl_qos_dispatch_asynchronous_override_add(p, cmd, kport, priority, USER_ADDR_NULL, retval);
790 }
791
792 int
793 _bsdthread_ctl_qos_dispatch_asynchronous_override_add(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, int __unused *retval)
794 {
795 thread_t th;
796 int rv = 0;
797
798 if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
799 return ESRCH;
800 }
801
802 struct uthread *uth = pthread_kern->get_bsdthread_info(th);
803 int override_qos = pthread_priority_get_qos_class(priority);
804
805 struct threadlist *tl = util_get_thread_threadlist_entry(th);
806 if (!tl) {
807 thread_deallocate(th);
808 return EPERM;
809 }
810
811 PTHREAD_TRACE(TRACE_wq_override_dispatch | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 1, priority, 0);
812
813 /* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */
814 pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), uth, 0, override_qos, TRUE, resource, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE);
815
816 thread_deallocate(th);
817 return rv;
818 }
819
820 int
821 _bsdthread_ctl_qos_override_reset(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval)
822 {
823 if (arg1 != 0 || arg2 != 0 || arg3 != 0) {
824 return EINVAL;
825 }
826
827 return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p, cmd, 1 /* reset_all */, 0, 0, retval);
828 }
829
830 int
831 _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(struct proc __unused *p, user_addr_t __unused cmd, int reset_all, user_addr_t resource, user_addr_t arg3, int __unused *retval)
832 {
833 thread_t th;
834 struct threadlist *tl;
835 int rv = 0;
836
837 if ((reset_all && (resource != 0)) || arg3 != 0) {
838 return EINVAL;
839 }
840
841 th = current_thread();
842 tl = util_get_thread_threadlist_entry(th);
843
844 if (tl) {
845 wq_thread_override_reset(th, reset_all ? THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD : resource);
846 } else {
847 rv = EPERM;
848 }
849
850 return rv;
851 }
852
853 int
854 _bsdthread_ctl(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval)
855 {
856 switch (cmd) {
857 case BSDTHREAD_CTL_SET_QOS:
858 return _bsdthread_ctl_set_qos(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval);
859 case BSDTHREAD_CTL_QOS_OVERRIDE_START:
860 return _bsdthread_ctl_qos_override_start(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
861 case BSDTHREAD_CTL_QOS_OVERRIDE_END:
862 return _bsdthread_ctl_qos_override_end(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval);
863 case BSDTHREAD_CTL_QOS_OVERRIDE_RESET:
864 return _bsdthread_ctl_qos_override_reset(p, cmd, arg1, arg2, arg3, retval);
865 case BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH:
866 return _bsdthread_ctl_qos_override_dispatch(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
867 case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD:
868 return _bsdthread_ctl_qos_dispatch_asynchronous_override_add(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
869 case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET:
870 return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p, cmd, (int)arg1, arg2, arg3, retval);
871 case BSDTHREAD_CTL_SET_SELF:
872 return _bsdthread_ctl_set_self(p, cmd, (pthread_priority_t)arg1, (mach_port_name_t)arg2, (_pthread_set_flags_t)arg3, retval);
873 default:
874 return EINVAL;
875 }
876 }
877
878 uint32_t wq_yielded_threshold = WQ_YIELDED_THRESHOLD;
879 uint32_t wq_yielded_window_usecs = WQ_YIELDED_WINDOW_USECS;
880 uint32_t wq_stalled_window_usecs = WQ_STALLED_WINDOW_USECS;
881 uint32_t wq_reduce_pool_window_usecs = WQ_REDUCE_POOL_WINDOW_USECS;
882 uint32_t wq_max_timer_interval_usecs = WQ_MAX_TIMER_INTERVAL_USECS;
883 uint32_t wq_max_threads = WORKQUEUE_MAXTHREADS;
884 uint32_t wq_max_constrained_threads = WORKQUEUE_MAXTHREADS / 8;
885
886
887 SYSCTL_INT(_kern, OID_AUTO, wq_yielded_threshold, CTLFLAG_RW | CTLFLAG_LOCKED,
888 &wq_yielded_threshold, 0, "");
889
890 SYSCTL_INT(_kern, OID_AUTO, wq_yielded_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
891 &wq_yielded_window_usecs, 0, "");
892
893 SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
894 &wq_stalled_window_usecs, 0, "");
895
896 SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
897 &wq_reduce_pool_window_usecs, 0, "");
898
899 SYSCTL_INT(_kern, OID_AUTO, wq_max_timer_interval_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
900 &wq_max_timer_interval_usecs, 0, "");
901
902 SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
903 &wq_max_threads, 0, "");
904
905 SYSCTL_INT(_kern, OID_AUTO, wq_max_constrained_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
906 &wq_max_constrained_threads, 0, "");
907
908
909 static uint32_t wq_init_constrained_limit = 1;
910
911
912 void
913 _workqueue_init_lock(proc_t p)
914 {
915 lck_spin_init(pthread_kern->proc_get_wqlockptr(p), pthread_lck_grp, pthread_lck_attr);
916 *(pthread_kern->proc_get_wqinitingptr(p)) = FALSE;
917 }
918
919 void
920 _workqueue_destroy_lock(proc_t p)
921 {
922 lck_spin_destroy(pthread_kern->proc_get_wqlockptr(p), pthread_lck_grp);
923 }
924
925
926 static void
927 workqueue_lock_spin(proc_t p)
928 {
929 lck_spin_lock(pthread_kern->proc_get_wqlockptr(p));
930 }
931
932 static void
933 workqueue_unlock(proc_t p)
934 {
935 lck_spin_unlock(pthread_kern->proc_get_wqlockptr(p));
936 }
937
938
939 static void
940 workqueue_interval_timer_start(struct workqueue *wq)
941 {
942 uint64_t deadline;
943
944 if (wq->wq_timer_interval == 0) {
945 wq->wq_timer_interval = wq_stalled_window_usecs;
946
947 } else {
948 wq->wq_timer_interval = wq->wq_timer_interval * 2;
949
950 if (wq->wq_timer_interval > wq_max_timer_interval_usecs) {
951 wq->wq_timer_interval = wq_max_timer_interval_usecs;
952 }
953 }
954 clock_interval_to_deadline(wq->wq_timer_interval, 1000, &deadline);
955
956 thread_call_enter_delayed(wq->wq_atimer_call, deadline);
957
958 PTHREAD_TRACE(TRACE_wq_start_add_timer, wq, wq->wq_reqcount, wq->wq_flags, wq->wq_timer_interval, 0);
959 }
960
961
962 static boolean_t
963 wq_thread_is_busy(uint64_t cur_ts, uint64_t *lastblocked_tsp)
964 {
965 clock_sec_t secs;
966 clock_usec_t usecs;
967 uint64_t lastblocked_ts;
968 uint64_t elapsed;
969
970 /*
971 * the timestamp is updated atomically w/o holding the workqueue lock
972 * so we need to do an atomic read of the 64 bits so that we don't see
973 * a mismatched pair of 32 bit reads... we accomplish this in an architecturally
974 * independent fashion by using OSCompareAndSwap64 to write back the
975 * value we grabbed... if it succeeds, then we have a good timestamp to
976 * evaluate... if it fails, we straddled grabbing the timestamp while it
977 * was being updated... treat a failed update as a busy thread since
978 * it implies we are about to see a really fresh timestamp anyway
979 */
980 lastblocked_ts = *lastblocked_tsp;
981
982 if ( !OSCompareAndSwap64((UInt64)lastblocked_ts, (UInt64)lastblocked_ts, lastblocked_tsp))
983 return (TRUE);
984
985 if (lastblocked_ts >= cur_ts) {
986 /*
987 * because the update of the timestamp when a thread blocks isn't
988 * serialized against us looking at it (i.e. we don't hold the workq lock)
989 * it's possible to have a timestamp that matches the current time or
990 * that even looks to be in the future relative to when we grabbed the current
991 * time... just treat this as a busy thread since it must have just blocked.
992 */
993 return (TRUE);
994 }
995 elapsed = cur_ts - lastblocked_ts;
996
997 pthread_kern->absolutetime_to_microtime(elapsed, &secs, &usecs);
998
999 if (secs == 0 && usecs < wq_stalled_window_usecs)
1000 return (TRUE);
1001 return (FALSE);
1002 }
1003
1004
1005 #define WQ_TIMER_NEEDED(wq, start_timer) do { \
1006 int oldflags = wq->wq_flags; \
1007 \
1008 if ( !(oldflags & (WQ_EXITING | WQ_ATIMER_RUNNING))) { \
1009 if (OSCompareAndSwap(oldflags, oldflags | WQ_ATIMER_RUNNING, (UInt32 *)&wq->wq_flags)) \
1010 start_timer = TRUE; \
1011 } \
1012 } while (0)
1013
1014
1015
1016 static void
1017 workqueue_add_timer(struct workqueue *wq, __unused int param1)
1018 {
1019 proc_t p;
1020 boolean_t start_timer = FALSE;
1021 boolean_t retval;
1022 boolean_t add_thread;
1023 uint32_t busycount;
1024
1025 PTHREAD_TRACE(TRACE_wq_add_timer | DBG_FUNC_START, wq, wq->wq_flags, wq->wq_nthreads, wq->wq_thidlecount, 0);
1026
1027 p = wq->wq_proc;
1028
1029 workqueue_lock_spin(p);
1030
1031 /*
1032 * because workqueue_callback now runs w/o taking the workqueue lock
1033 * we are unsynchronized w/r to a change in state of the running threads...
1034 * to make sure we always evaluate that change, we allow it to start up
1035 * a new timer if the current one is actively evalutating the state
1036 * however, we do not need more than 2 timers fired up (1 active and 1 pending)
1037 * and we certainly do not want 2 active timers evaluating the state
1038 * simultaneously... so use WQL_ATIMER_BUSY to serialize the timers...
1039 * note that WQL_ATIMER_BUSY is in a different flag word from WQ_ATIMER_RUNNING since
1040 * it is always protected by the workq lock... WQ_ATIMER_RUNNING is evaluated
1041 * and set atomimcally since the callback function needs to manipulate it
1042 * w/o holding the workq lock...
1043 *
1044 * !WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY == no pending timer, no active timer
1045 * !WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY == no pending timer, 1 active timer
1046 * WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY == 1 pending timer, no active timer
1047 * WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY == 1 pending timer, 1 active timer
1048 */
1049 while (wq->wq_lflags & WQL_ATIMER_BUSY) {
1050 wq->wq_lflags |= WQL_ATIMER_WAITING;
1051
1052 assert_wait((caddr_t)wq, (THREAD_UNINT));
1053 workqueue_unlock(p);
1054
1055 thread_block(THREAD_CONTINUE_NULL);
1056
1057 workqueue_lock_spin(p);
1058 }
1059 wq->wq_lflags |= WQL_ATIMER_BUSY;
1060
1061 /*
1062 * the workq lock will protect us from seeing WQ_EXITING change state, but we
1063 * still need to update this atomically in case someone else tries to start
1064 * the timer just as we're releasing it
1065 */
1066 while ( !(OSCompareAndSwap(wq->wq_flags, (wq->wq_flags & ~WQ_ATIMER_RUNNING), (UInt32 *)&wq->wq_flags)));
1067
1068 again:
1069 retval = TRUE;
1070 add_thread = FALSE;
1071
1072 if ( !(wq->wq_flags & WQ_EXITING)) {
1073 /*
1074 * check to see if the stall frequency was beyond our tolerance
1075 * or we have work on the queue, but haven't scheduled any
1076 * new work within our acceptable time interval because
1077 * there were no idle threads left to schedule
1078 */
1079 if (wq->wq_reqcount) {
1080 uint32_t priclass;
1081 uint32_t thactive_count;
1082 uint32_t i;
1083 uint64_t curtime;
1084
1085 for (priclass = 0; priclass < WORKQUEUE_NUM_BUCKETS; priclass++) {
1086 if (wq->wq_requests[priclass])
1087 break;
1088 }
1089 assert(priclass < WORKQUEUE_NUM_BUCKETS);
1090
1091 curtime = mach_absolute_time();
1092 busycount = 0;
1093 thactive_count = 0;
1094
1095 /*
1096 * check for conditions under which we would not add a thread, either
1097 * a) we've got as many running threads as we want in this priority
1098 * band and the priority bands above it
1099 *
1100 * b) check to see if the priority group has blocked threads, if the
1101 * last blocked timestamp is old enough, we will have already passed
1102 * (a) where we would have stopped if we had enough active threads.
1103 */
1104 for (i = 0; i <= priclass; i++) {
1105
1106 thactive_count += wq->wq_thactive_count[i];
1107
1108 if (wq->wq_thscheduled_count[i]) {
1109 if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i]))
1110 busycount++;
1111 }
1112 }
1113 if (thactive_count + busycount < wq->wq_max_concurrency) {
1114
1115 if (wq->wq_thidlecount == 0) {
1116 /*
1117 * if we have no idle threads, try to add one
1118 */
1119 retval = workqueue_addnewthread(wq, FALSE);
1120 }
1121 add_thread = TRUE;
1122 }
1123
1124 if (wq->wq_reqcount) {
1125 /*
1126 * as long as we have threads to schedule, and we successfully
1127 * scheduled new work, keep trying
1128 */
1129 while (wq->wq_thidlecount && !(wq->wq_flags & WQ_EXITING)) {
1130 /*
1131 * workqueue_run_nextreq is responsible for
1132 * dropping the workqueue lock in all cases
1133 */
1134 retval = workqueue_run_nextreq(p, wq, THREAD_NULL, FALSE, FALSE, 0);
1135 workqueue_lock_spin(p);
1136
1137 if (retval == FALSE)
1138 break;
1139 }
1140 if ( !(wq->wq_flags & WQ_EXITING) && wq->wq_reqcount) {
1141
1142 if (wq->wq_thidlecount == 0 && retval == TRUE && add_thread == TRUE)
1143 goto again;
1144
1145 if (wq->wq_thidlecount == 0 || busycount)
1146 WQ_TIMER_NEEDED(wq, start_timer);
1147
1148 PTHREAD_TRACE(TRACE_wq_add_timer | DBG_FUNC_NONE, wq, wq->wq_reqcount, wq->wq_thidlecount, busycount, 0);
1149 }
1150 }
1151 }
1152 }
1153 if ( !(wq->wq_flags & WQ_ATIMER_RUNNING))
1154 wq->wq_timer_interval = 0;
1155
1156 wq->wq_lflags &= ~WQL_ATIMER_BUSY;
1157
1158 if ((wq->wq_flags & WQ_EXITING) || (wq->wq_lflags & WQL_ATIMER_WAITING)) {
1159 /*
1160 * wakeup the thread hung up in workqueue_exit or workqueue_add_timer waiting for this timer
1161 * to finish getting out of the way
1162 */
1163 wq->wq_lflags &= ~WQL_ATIMER_WAITING;
1164 wakeup(wq);
1165 }
1166
1167 PTHREAD_TRACE(TRACE_wq_add_timer | DBG_FUNC_END, wq, start_timer, wq->wq_nthreads, wq->wq_thidlecount, 0);
1168
1169 workqueue_unlock(p);
1170
1171 if (start_timer == TRUE)
1172 workqueue_interval_timer_start(wq);
1173 }
1174
1175
1176 void
1177 _workqueue_thread_yielded(void)
1178 {
1179 struct workqueue *wq;
1180 proc_t p;
1181
1182 p = current_proc();
1183
1184 if ((wq = pthread_kern->proc_get_wqptr(p)) == NULL || wq->wq_reqcount == 0)
1185 return;
1186
1187 workqueue_lock_spin(p);
1188
1189 if (wq->wq_reqcount) {
1190 uint64_t curtime;
1191 uint64_t elapsed;
1192 clock_sec_t secs;
1193 clock_usec_t usecs;
1194
1195 if (wq->wq_thread_yielded_count++ == 0)
1196 wq->wq_thread_yielded_timestamp = mach_absolute_time();
1197
1198 if (wq->wq_thread_yielded_count < wq_yielded_threshold) {
1199 workqueue_unlock(p);
1200 return;
1201 }
1202
1203 PTHREAD_TRACE(TRACE_wq_thread_yielded | DBG_FUNC_START, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 0, 0);
1204
1205 wq->wq_thread_yielded_count = 0;
1206
1207 curtime = mach_absolute_time();
1208 elapsed = curtime - wq->wq_thread_yielded_timestamp;
1209 pthread_kern->absolutetime_to_microtime(elapsed, &secs, &usecs);
1210
1211 if (secs == 0 && usecs < wq_yielded_window_usecs) {
1212
1213 if (wq->wq_thidlecount == 0) {
1214 workqueue_addnewthread(wq, TRUE);
1215 /*
1216 * 'workqueue_addnewthread' drops the workqueue lock
1217 * when creating the new thread and then retakes it before
1218 * returning... this window allows other threads to process
1219 * requests, so we need to recheck for available work
1220 * if none found, we just return... the newly created thread
1221 * will eventually get used (if it hasn't already)...
1222 */
1223 if (wq->wq_reqcount == 0) {
1224 workqueue_unlock(p);
1225 return;
1226 }
1227 }
1228 if (wq->wq_thidlecount) {
1229 uint32_t priority;
1230 boolean_t overcommit = FALSE;
1231 boolean_t force_oc = FALSE;
1232
1233 for (priority = 0; priority < WORKQUEUE_NUM_BUCKETS; priority++) {
1234 if (wq->wq_requests[priority]) {
1235 break;
1236 }
1237 }
1238 assert(priority < WORKQUEUE_NUM_BUCKETS);
1239
1240 wq->wq_reqcount--;
1241 wq->wq_requests[priority]--;
1242
1243 if (wq->wq_ocrequests[priority]) {
1244 wq->wq_ocrequests[priority]--;
1245 overcommit = TRUE;
1246 } else
1247 force_oc = TRUE;
1248
1249 (void)workqueue_run_nextreq(p, wq, THREAD_NULL, force_oc, overcommit, pthread_priority_from_class_index(priority));
1250 /*
1251 * workqueue_run_nextreq is responsible for
1252 * dropping the workqueue lock in all cases
1253 */
1254 PTHREAD_TRACE(TRACE_wq_thread_yielded | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 1, 0);
1255
1256 return;
1257 }
1258 }
1259 PTHREAD_TRACE(TRACE_wq_thread_yielded | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 2, 0);
1260 }
1261 workqueue_unlock(p);
1262 }
1263
1264
1265
1266 static void
1267 workqueue_callback(int type, thread_t thread)
1268 {
1269 struct uthread *uth;
1270 struct threadlist *tl;
1271 struct workqueue *wq;
1272
1273 uth = pthread_kern->get_bsdthread_info(thread);
1274 tl = pthread_kern->uthread_get_threadlist(uth);
1275 wq = tl->th_workq;
1276
1277 switch (type) {
1278 case SCHED_CALL_BLOCK: {
1279 uint32_t old_activecount;
1280 boolean_t start_timer = FALSE;
1281
1282 old_activecount = OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority]);
1283
1284 if (old_activecount == wq->wq_reqconc[tl->th_priority]) {
1285 uint64_t curtime;
1286 UInt64 *lastblocked_ptr;
1287
1288 /*
1289 * the number of active threads at this priority
1290 * has fallen below the maximum number of concurrent
1291 * threads that we're allowed to run
1292 */
1293 lastblocked_ptr = (UInt64 *)&wq->wq_lastblocked_ts[tl->th_priority];
1294 curtime = mach_absolute_time();
1295
1296 /*
1297 * if we collide with another thread trying to update the last_blocked (really unlikely
1298 * since another thread would have to get scheduled and then block after we start down
1299 * this path), it's not a problem. Either timestamp is adequate, so no need to retry
1300 */
1301
1302 OSCompareAndSwap64(*lastblocked_ptr, (UInt64)curtime, lastblocked_ptr);
1303
1304 if (wq->wq_reqcount) {
1305 /*
1306 * we have work to do so start up the timer
1307 * if it's not running... we'll let it sort
1308 * out whether we really need to start up
1309 * another thread
1310 */
1311 WQ_TIMER_NEEDED(wq, start_timer);
1312 }
1313
1314 if (start_timer == TRUE) {
1315 workqueue_interval_timer_start(wq);
1316 }
1317 }
1318 PTHREAD_TRACE1(TRACE_wq_thread_block | DBG_FUNC_START, wq, old_activecount, tl->th_priority, start_timer, thread_tid(thread));
1319 break;
1320 }
1321 case SCHED_CALL_UNBLOCK:
1322 /*
1323 * we cannot take the workqueue_lock here...
1324 * an UNBLOCK can occur from a timer event which
1325 * is run from an interrupt context... if the workqueue_lock
1326 * is already held by this processor, we'll deadlock...
1327 * the thread lock for the thread being UNBLOCKED
1328 * is also held
1329 */
1330 OSAddAtomic(1, &wq->wq_thactive_count[tl->th_priority]);
1331
1332 PTHREAD_TRACE1(TRACE_wq_thread_block | DBG_FUNC_END, wq, wq->wq_threads_scheduled, tl->th_priority, 0, thread_tid(thread));
1333
1334 break;
1335 }
1336 }
1337
1338 sched_call_t
1339 _workqueue_get_sched_callback(void)
1340 {
1341 return workqueue_callback;
1342 }
1343
1344 static void
1345 workqueue_removethread(struct threadlist *tl, int fromexit)
1346 {
1347 struct workqueue *wq;
1348 struct uthread * uth;
1349
1350 /*
1351 * If fromexit is set, the call is from workqueue_exit(,
1352 * so some cleanups are to be avoided.
1353 */
1354 wq = tl->th_workq;
1355
1356 TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
1357
1358 if (fromexit == 0) {
1359 wq->wq_nthreads--;
1360 wq->wq_thidlecount--;
1361 }
1362
1363 /*
1364 * Clear the threadlist pointer in uthread so
1365 * blocked thread on wakeup for termination will
1366 * not access the thread list as it is going to be
1367 * freed.
1368 */
1369 pthread_kern->thread_sched_call(tl->th_thread, NULL);
1370
1371 uth = pthread_kern->get_bsdthread_info(tl->th_thread);
1372 if (uth != (struct uthread *)0) {
1373 pthread_kern->uthread_set_threadlist(uth, NULL);
1374 }
1375 if (fromexit == 0) {
1376 /* during exit the lock is not held */
1377 workqueue_unlock(wq->wq_proc);
1378 }
1379
1380 if ( (tl->th_flags & TH_LIST_SUSPENDED) ) {
1381 /*
1382 * thread was created, but never used...
1383 * need to clean up the stack and port ourselves
1384 * since we're not going to spin up through the
1385 * normal exit path triggered from Libc
1386 */
1387 if (fromexit == 0) {
1388 /* vm map is already deallocated when this is called from exit */
1389 (void)mach_vm_deallocate(wq->wq_map, tl->th_stackaddr, tl->th_allocsize);
1390 }
1391 (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(wq->wq_task), tl->th_thport);
1392
1393 PTHREAD_TRACE1(TRACE_wq_thread_suspend | DBG_FUNC_END, wq, (uintptr_t)thread_tid(current_thread()), wq->wq_nthreads, 0xdead, thread_tid(tl->th_thread));
1394 } else {
1395
1396 PTHREAD_TRACE1(TRACE_wq_thread_park | DBG_FUNC_END, wq, (uintptr_t)thread_tid(current_thread()), wq->wq_nthreads, 0xdead, thread_tid(tl->th_thread));
1397 }
1398 /*
1399 * drop our ref on the thread
1400 */
1401 thread_deallocate(tl->th_thread);
1402
1403 kfree(tl, sizeof(struct threadlist));
1404 }
1405
1406
1407 /*
1408 * called with workq lock held
1409 * dropped and retaken around thread creation
1410 * return with workq lock held
1411 */
1412 static boolean_t
1413 workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread)
1414 {
1415 struct threadlist *tl;
1416 struct uthread *uth;
1417 kern_return_t kret;
1418 thread_t th;
1419 proc_t p;
1420 void *sright;
1421 mach_vm_offset_t stackaddr;
1422 mach_vm_size_t guardsize;
1423
1424 if ((wq->wq_flags & WQ_EXITING) == WQ_EXITING)
1425 return (FALSE);
1426
1427 if (wq->wq_nthreads >= wq_max_threads || wq->wq_nthreads >= (pthread_kern->config_thread_max - 20)) {
1428 wq->wq_lflags |= WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
1429 return (FALSE);
1430 }
1431 wq->wq_lflags &= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
1432
1433 if (oc_thread == FALSE && wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
1434 /*
1435 * if we're not creating this thread to service an overcommit request,
1436 * then check the size of the constrained thread pool... if we've already
1437 * reached our max for threads scheduled from this pool, don't create a new
1438 * one... the callers of this function are prepared for failure.
1439 */
1440 wq->wq_lflags |= WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
1441 return (FALSE);
1442 }
1443 if (wq->wq_constrained_threads_scheduled < wq_max_constrained_threads)
1444 wq->wq_lflags &= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
1445
1446 wq->wq_nthreads++;
1447
1448 p = wq->wq_proc;
1449 workqueue_unlock(p);
1450
1451 kret = pthread_kern->thread_create_workq(wq->wq_task, (thread_continue_t)wq_unsuspend_continue, &th);
1452 if (kret != KERN_SUCCESS) {
1453 goto failed;
1454 }
1455
1456 tl = kalloc(sizeof(struct threadlist));
1457 bzero(tl, sizeof(struct threadlist));
1458
1459 #if defined(__i386__) || defined(__x86_64__)
1460 stackaddr = 0xB0000000;
1461 #else
1462 #error Need to define a stack address hint for this architecture
1463 #endif
1464
1465 guardsize = vm_map_page_size(wq->wq_map);
1466 tl->th_allocsize = PTH_DEFAULT_STACKSIZE + guardsize + pthread_kern->proc_get_pthsize(p);
1467
1468 kret = mach_vm_map(wq->wq_map, &stackaddr,
1469 tl->th_allocsize,
1470 page_size-1,
1471 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
1472 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
1473 VM_INHERIT_DEFAULT);
1474
1475 if (kret != KERN_SUCCESS) {
1476 kret = mach_vm_allocate(wq->wq_map,
1477 &stackaddr, tl->th_allocsize,
1478 VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
1479 }
1480 if (kret == KERN_SUCCESS) {
1481 /*
1482 * The guard page is at the lowest address
1483 * The stack base is the highest address
1484 */
1485 kret = mach_vm_protect(wq->wq_map, stackaddr, guardsize, FALSE, VM_PROT_NONE);
1486
1487 if (kret != KERN_SUCCESS)
1488 (void) mach_vm_deallocate(wq->wq_map, stackaddr, tl->th_allocsize);
1489 }
1490 if (kret != KERN_SUCCESS) {
1491 (void) thread_terminate(th);
1492 thread_deallocate(th);
1493
1494 kfree(tl, sizeof(struct threadlist));
1495 goto failed;
1496 }
1497 thread_reference(th);
1498
1499 sright = (void *)pthread_kern->convert_thread_to_port(th);
1500 tl->th_thport = pthread_kern->ipc_port_copyout_send(sright, pthread_kern->task_get_ipcspace(wq->wq_task));
1501
1502 pthread_kern->thread_static_param(th, TRUE);
1503
1504 tl->th_flags = TH_LIST_INITED | TH_LIST_SUSPENDED;
1505
1506 tl->th_thread = th;
1507 tl->th_workq = wq;
1508 tl->th_stackaddr = stackaddr;
1509 tl->th_priority = WORKQUEUE_NUM_BUCKETS;
1510 tl->th_policy = -1;
1511
1512 uth = pthread_kern->get_bsdthread_info(tl->th_thread);
1513
1514 workqueue_lock_spin(p);
1515
1516 pthread_kern->uthread_set_threadlist(uth, tl);
1517 TAILQ_INSERT_TAIL(&wq->wq_thidlelist, tl, th_entry);
1518
1519 wq->wq_thidlecount++;
1520
1521 PTHREAD_TRACE1(TRACE_wq_thread_suspend | DBG_FUNC_START, wq, wq->wq_nthreads, 0, thread_tid(current_thread()), thread_tid(tl->th_thread));
1522
1523 return (TRUE);
1524
1525 failed:
1526 workqueue_lock_spin(p);
1527 wq->wq_nthreads--;
1528
1529 return (FALSE);
1530 }
1531
1532
1533 int
1534 _workq_open(struct proc *p, __unused int32_t *retval)
1535 {
1536 struct workqueue * wq;
1537 int wq_size;
1538 char * ptr;
1539 uint32_t i;
1540 uint32_t num_cpus;
1541 int error = 0;
1542 boolean_t need_wakeup = FALSE;
1543
1544 if (pthread_kern->proc_get_register(p) == 0) {
1545 return EINVAL;
1546 }
1547
1548 num_cpus = pthread_kern->ml_get_max_cpus();
1549
1550 if (wq_init_constrained_limit) {
1551 uint32_t limit;
1552 /*
1553 * set up the limit for the constrained pool
1554 * this is a virtual pool in that we don't
1555 * maintain it on a separate idle and run list
1556 */
1557 limit = num_cpus * WORKQUEUE_CONSTRAINED_FACTOR;
1558
1559 if (limit > wq_max_constrained_threads)
1560 wq_max_constrained_threads = limit;
1561
1562 wq_init_constrained_limit = 0;
1563 }
1564 workqueue_lock_spin(p);
1565
1566 if (pthread_kern->proc_get_wqptr(p) == NULL) {
1567
1568 while (*pthread_kern->proc_get_wqinitingptr(p) == TRUE) {
1569
1570 assert_wait((caddr_t)pthread_kern->proc_get_wqinitingptr(p), THREAD_UNINT);
1571 workqueue_unlock(p);
1572
1573 thread_block(THREAD_CONTINUE_NULL);
1574
1575 workqueue_lock_spin(p);
1576 }
1577 if (pthread_kern->proc_get_wqptr(p) != NULL) {
1578 goto out;
1579 }
1580
1581 *(pthread_kern->proc_get_wqinitingptr(p)) = TRUE;
1582
1583 workqueue_unlock(p);
1584
1585 wq_size = sizeof(struct workqueue);
1586
1587 ptr = (char *)kalloc(wq_size);
1588 bzero(ptr, wq_size);
1589
1590 wq = (struct workqueue *)ptr;
1591 wq->wq_flags = WQ_LIST_INITED;
1592 wq->wq_proc = p;
1593 wq->wq_max_concurrency = num_cpus;
1594 wq->wq_task = current_task();
1595 wq->wq_map = pthread_kern->current_map();
1596
1597 for (i = 0; i < WORKQUEUE_NUM_BUCKETS; i++)
1598 wq->wq_reqconc[i] = (uint16_t)wq->wq_max_concurrency;
1599
1600 TAILQ_INIT(&wq->wq_thrunlist);
1601 TAILQ_INIT(&wq->wq_thidlelist);
1602
1603 wq->wq_atimer_call = thread_call_allocate((thread_call_func_t)workqueue_add_timer, (thread_call_param_t)wq);
1604
1605 workqueue_lock_spin(p);
1606
1607 pthread_kern->proc_set_wqptr(p, wq);
1608 pthread_kern->proc_set_wqsize(p, wq_size);
1609
1610 *(pthread_kern->proc_get_wqinitingptr(p)) = FALSE;
1611 need_wakeup = TRUE;
1612 }
1613 out:
1614 workqueue_unlock(p);
1615
1616 if (need_wakeup == TRUE) {
1617 wakeup(pthread_kern->proc_get_wqinitingptr(p));
1618 }
1619 return(error);
1620 }
1621
1622
1623 int
1624 _workq_kernreturn(struct proc *p,
1625 int options,
1626 __unused user_addr_t item,
1627 int arg2,
1628 int arg3,
1629 __unused int32_t *retval)
1630 {
1631 struct workqueue *wq;
1632 int error = 0;
1633
1634 if (pthread_kern->proc_get_register(p) == 0) {
1635 return EINVAL;
1636 }
1637
1638 switch (options) {
1639 case WQOPS_QUEUE_NEWSPISUPP: {
1640 /*
1641 * arg2 = offset of serialno into dispatch queue
1642 */
1643 int offset = arg2;
1644
1645 pthread_kern->proc_set_dispatchqueue_serialno_offset(p, (uint64_t)offset);
1646 break;
1647 }
1648 case WQOPS_QUEUE_REQTHREADS: {
1649 /*
1650 * arg2 = number of threads to start
1651 * arg3 = priority
1652 */
1653 boolean_t overcommit = FALSE;
1654 int reqcount = arg2;
1655 pthread_priority_t priority = arg3;
1656 int class;
1657
1658 overcommit = (_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0;
1659 class = pthread_priority_get_class_index(priority);
1660
1661 if ((reqcount <= 0) || (class < 0) || (class >= WORKQUEUE_NUM_BUCKETS)) {
1662 error = EINVAL;
1663 break;
1664 }
1665
1666 workqueue_lock_spin(p);
1667
1668 if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) {
1669 workqueue_unlock(p);
1670
1671 error = EINVAL;
1672 break;
1673 }
1674
1675 if (!overcommit) {
1676 wq->wq_reqcount += reqcount;
1677 wq->wq_requests[class] += reqcount;
1678
1679 PTHREAD_TRACE(TRACE_wq_req_threads | DBG_FUNC_NONE, wq, priority, wq->wq_requests[class], reqcount, 0);
1680
1681 while (wq->wq_reqcount) {
1682 if (!workqueue_run_one(p, wq, overcommit, priority))
1683 break;
1684 }
1685 } else {
1686 PTHREAD_TRACE(TRACE_wq_req_octhreads | DBG_FUNC_NONE, wq, priority, wq->wq_requests[class], reqcount, 0);
1687
1688 while (reqcount) {
1689 if (!workqueue_run_one(p, wq, overcommit, priority))
1690 break;
1691 reqcount--;
1692 }
1693 if (reqcount) {
1694 /*
1695 * we need to delay starting some of the overcommit requests...
1696 * we should only fail to create the overcommit threads if
1697 * we're at the max thread limit... as existing threads
1698 * return to the kernel, we'll notice the ocrequests
1699 * and spin them back to user space as the overcommit variety
1700 */
1701 wq->wq_reqcount += reqcount;
1702 wq->wq_requests[class] += reqcount;
1703 wq->wq_ocrequests[class] += reqcount;
1704
1705 PTHREAD_TRACE(TRACE_wq_delay_octhreads | DBG_FUNC_NONE, wq, priority, wq->wq_requests[class], reqcount, 0);
1706 }
1707 }
1708 workqueue_unlock(p);
1709 break;
1710 }
1711
1712 case WQOPS_THREAD_RETURN: {
1713 thread_t th = current_thread();
1714 struct uthread *uth = pthread_kern->get_bsdthread_info(th);
1715 struct threadlist *tl = util_get_thread_threadlist_entry(th);
1716
1717 /* reset signal mask on the workqueue thread to default state */
1718 if (pthread_kern->uthread_get_sigmask(uth) != (sigset_t)(~workq_threadmask)) {
1719 pthread_kern->proc_lock(p);
1720 pthread_kern->uthread_set_sigmask(uth, ~workq_threadmask);
1721 pthread_kern->proc_unlock(p);
1722 }
1723
1724 /* dropping WQ override counts has to be done outside the wq lock. */
1725 wq_thread_override_reset(th, THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD);
1726
1727 workqueue_lock_spin(p);
1728
1729 if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL || !tl) {
1730 workqueue_unlock(p);
1731
1732 error = EINVAL;
1733 break;
1734 }
1735 PTHREAD_TRACE(TRACE_wq_runitem | DBG_FUNC_END, wq, 0, 0, 0, 0);
1736
1737
1738 (void)workqueue_run_nextreq(p, wq, th, FALSE, FALSE, 0);
1739 /*
1740 * workqueue_run_nextreq is responsible for
1741 * dropping the workqueue lock in all cases
1742 */
1743 break;
1744 }
1745
1746 default:
1747 error = EINVAL;
1748 break;
1749 }
1750 return (error);
1751 }
1752
1753 /*
1754 * Routine: workqueue_mark_exiting
1755 *
1756 * Function: Mark the work queue such that new threads will not be added to the
1757 * work queue after we return.
1758 *
1759 * Conditions: Called against the current process.
1760 */
1761 void
1762 _workqueue_mark_exiting(struct proc *p)
1763 {
1764 struct workqueue *wq = pthread_kern->proc_get_wqptr(p);
1765
1766 if (wq != NULL) {
1767
1768 PTHREAD_TRACE(TRACE_wq_pthread_exit|DBG_FUNC_START, wq, 0, 0, 0, 0);
1769
1770 workqueue_lock_spin(p);
1771
1772 /*
1773 * we now arm the timer in the callback function w/o holding the workq lock...
1774 * we do this by setting WQ_ATIMER_RUNNING via OSCompareAndSwap in order to
1775 * insure only a single timer if running and to notice that WQ_EXITING has
1776 * been set (we don't want to start a timer once WQ_EXITING is posted)
1777 *
1778 * so once we have successfully set WQ_EXITING, we cannot fire up a new timer...
1779 * therefor no need to clear the timer state atomically from the flags
1780 *
1781 * since we always hold the workq lock when dropping WQ_ATIMER_RUNNING
1782 * the check for and sleep until clear is protected
1783 */
1784 while (!(OSCompareAndSwap(wq->wq_flags, (wq->wq_flags | WQ_EXITING), (UInt32 *)&wq->wq_flags)));
1785
1786 if (wq->wq_flags & WQ_ATIMER_RUNNING) {
1787 if (thread_call_cancel(wq->wq_atimer_call) == TRUE) {
1788 wq->wq_flags &= ~WQ_ATIMER_RUNNING;
1789 }
1790 }
1791 while ((wq->wq_flags & WQ_ATIMER_RUNNING) || (wq->wq_lflags & WQL_ATIMER_BUSY)) {
1792 assert_wait((caddr_t)wq, (THREAD_UNINT));
1793 workqueue_unlock(p);
1794
1795 thread_block(THREAD_CONTINUE_NULL);
1796
1797 workqueue_lock_spin(p);
1798 }
1799 workqueue_unlock(p);
1800
1801 PTHREAD_TRACE(TRACE_wq_pthread_exit|DBG_FUNC_END, 0, 0, 0, 0, 0);
1802 }
1803 }
1804
1805 /*
1806 * Routine: workqueue_exit
1807 *
1808 * Function: clean up the work queue structure(s) now that there are no threads
1809 * left running inside the work queue (except possibly current_thread).
1810 *
1811 * Conditions: Called by the last thread in the process.
1812 * Called against current process.
1813 */
1814 void
1815 _workqueue_exit(struct proc *p)
1816 {
1817 struct workqueue * wq;
1818 struct threadlist * tl, *tlist;
1819 struct uthread *uth;
1820 int wq_size = 0;
1821
1822 wq = pthread_kern->proc_get_wqptr(p);
1823 if (wq != NULL) {
1824
1825 PTHREAD_TRACE(TRACE_wq_workqueue_exit|DBG_FUNC_START, wq, 0, 0, 0, 0);
1826
1827 wq_size = pthread_kern->proc_get_wqsize(p);
1828 pthread_kern->proc_set_wqptr(p, NULL);
1829 pthread_kern->proc_set_wqsize(p, 0);
1830
1831 /*
1832 * Clean up workqueue data structures for threads that exited and
1833 * didn't get a chance to clean up after themselves.
1834 */
1835 TAILQ_FOREACH_SAFE(tl, &wq->wq_thrunlist, th_entry, tlist) {
1836 pthread_kern->thread_sched_call(tl->th_thread, NULL);
1837
1838 uth = pthread_kern->get_bsdthread_info(tl->th_thread);
1839 if (uth != (struct uthread *)0) {
1840 pthread_kern->uthread_set_threadlist(uth, NULL);
1841 }
1842 TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
1843
1844 /*
1845 * drop our last ref on the thread
1846 */
1847 thread_deallocate(tl->th_thread);
1848
1849 kfree(tl, sizeof(struct threadlist));
1850 }
1851 TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlelist, th_entry, tlist) {
1852 workqueue_removethread(tl, 1);
1853 }
1854 thread_call_free(wq->wq_atimer_call);
1855
1856 kfree(wq, wq_size);
1857
1858 PTHREAD_TRACE(TRACE_wq_workqueue_exit|DBG_FUNC_END, 0, 0, 0, 0, 0);
1859 }
1860 }
1861
1862
1863 static boolean_t
1864 workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, pthread_priority_t priority)
1865 {
1866 boolean_t ran_one;
1867
1868 if (wq->wq_thidlecount == 0) {
1869 if (overcommit == FALSE) {
1870 if (wq->wq_constrained_threads_scheduled < wq->wq_max_concurrency)
1871 workqueue_addnewthread(wq, overcommit);
1872 } else {
1873 workqueue_addnewthread(wq, overcommit);
1874
1875 if (wq->wq_thidlecount == 0)
1876 return (FALSE);
1877 }
1878 }
1879 ran_one = workqueue_run_nextreq(p, wq, THREAD_NULL, FALSE, overcommit, priority);
1880 /*
1881 * workqueue_run_nextreq is responsible for
1882 * dropping the workqueue lock in all cases
1883 */
1884 workqueue_lock_spin(p);
1885
1886 return (ran_one);
1887 }
1888
1889
1890
1891 /*
1892 * workqueue_run_nextreq:
1893 * called with the workqueue lock held...
1894 * responsible for dropping it in all cases
1895 */
1896 static boolean_t
1897 workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t thread,
1898 boolean_t force_oc, boolean_t overcommit, pthread_priority_t oc_prio)
1899 {
1900 thread_t th_to_run = THREAD_NULL;
1901 thread_t th_to_park = THREAD_NULL;
1902 int wake_thread = 0;
1903 int reuse_thread = WQ_FLAG_THREAD_REUSE;
1904 uint32_t priclass, orig_class;
1905 uint32_t us_to_wait;
1906 struct threadlist *tl = NULL;
1907 struct uthread *uth = NULL;
1908 boolean_t start_timer = FALSE;
1909 boolean_t adjust_counters = TRUE;
1910 uint64_t curtime;
1911 uint32_t thactive_count;
1912 uint32_t busycount;
1913
1914 PTHREAD_TRACE(TRACE_wq_run_nextitem|DBG_FUNC_START, wq, thread, wq->wq_thidlecount, wq->wq_reqcount, 0);
1915
1916 if (thread != THREAD_NULL) {
1917 uth = pthread_kern->get_bsdthread_info(thread);
1918
1919 if ((tl = pthread_kern->uthread_get_threadlist(uth)) == NULL) {
1920 panic("wq thread with no threadlist");
1921 }
1922 }
1923
1924 /*
1925 * from here until we drop the workq lock
1926 * we can't be pre-empted since we hold
1927 * the lock in spin mode... this is important
1928 * since we have to independently update the priority that
1929 * the thread is associated with and the priorty based
1930 * counters that "workqueue_callback" also changes and bases
1931 * decisons on.
1932 */
1933 dispatch_overcommit:
1934
1935 if (overcommit || force_oc) {
1936 priclass = pthread_priority_get_class_index(oc_prio);
1937
1938 if (thread != THREAD_NULL) {
1939 th_to_run = thread;
1940 goto pick_up_work;
1941 }
1942 goto grab_idle_thread;
1943 }
1944 if (wq->wq_reqcount) {
1945 for (priclass = 0; priclass < WORKQUEUE_NUM_BUCKETS; priclass++) {
1946 if (wq->wq_requests[priclass])
1947 break;
1948 }
1949 assert(priclass < WORKQUEUE_NUM_BUCKETS);
1950
1951 if (wq->wq_ocrequests[priclass] && (thread != THREAD_NULL || wq->wq_thidlecount)) {
1952 /*
1953 * handle delayed overcommit request...
1954 * they have priority over normal requests
1955 * within a given priority level
1956 */
1957 wq->wq_reqcount--;
1958 wq->wq_requests[priclass]--;
1959 wq->wq_ocrequests[priclass]--;
1960
1961 oc_prio = pthread_priority_from_class_index(priclass);
1962 overcommit = TRUE;
1963
1964 goto dispatch_overcommit;
1965 }
1966 }
1967 /*
1968 * if we get here, the work should be handled by a constrained thread
1969 */
1970 if (wq->wq_reqcount == 0 || wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
1971 /*
1972 * no work to do, or we're already at or over the scheduling limit for
1973 * constrained threads... just return or park the thread...
1974 * do not start the timer for this condition... if we don't have any work,
1975 * we'll check again when new work arrives... if we're over the limit, we need 1 or more
1976 * constrained threads to return to the kernel before we can dispatch additional work
1977 */
1978 if ((th_to_park = thread) == THREAD_NULL)
1979 goto out_of_work;
1980 goto parkit;
1981 }
1982
1983 thactive_count = 0;
1984 busycount = 0;
1985
1986 curtime = mach_absolute_time();
1987
1988 thactive_count += wq->wq_thactive_count[priclass];
1989
1990 if (wq->wq_thscheduled_count[priclass]) {
1991 if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[priclass])) {
1992 busycount++;
1993 }
1994 }
1995
1996 if (thread != THREAD_NULL) {
1997 if (tl->th_priority == priclass) {
1998 /*
1999 * dont't count this thread as currently active
2000 */
2001 thactive_count--;
2002 }
2003 }
2004 if (thactive_count + busycount >= wq->wq_max_concurrency) {
2005 if (busycount) {
2006 /*
2007 * we found at least 1 thread in the
2008 * 'busy' state... make sure we start
2009 * the timer because if they are the only
2010 * threads keeping us from scheduling
2011 * this work request, we won't get a callback
2012 * to kick off the timer... we need to
2013 * start it now...
2014 */
2015 WQ_TIMER_NEEDED(wq, start_timer);
2016 }
2017
2018 PTHREAD_TRACE(TRACE_wq_overcommitted|DBG_FUNC_NONE, wq, (start_timer ? 1<<7 : 0) | pthread_priority_from_class_index(priclass), thactive_count, busycount, 0);
2019
2020 if ((th_to_park = thread) == THREAD_NULL) {
2021 goto out_of_work;
2022 }
2023
2024 goto parkit;
2025 }
2026
2027 if (thread != THREAD_NULL) {
2028 /*
2029 * thread is non-NULL here when we return from userspace
2030 * in workq_kernreturn, rather than trying to find a thread
2031 * we pick up new work for this specific thread.
2032 */
2033 th_to_run = thread;
2034 goto pick_up_work;
2035 }
2036
2037 grab_idle_thread:
2038 if (wq->wq_thidlecount == 0) {
2039 /*
2040 * we have no additional threads waiting to pick up
2041 * work, however, there is additional work to do.
2042 */
2043 WQ_TIMER_NEEDED(wq, start_timer);
2044
2045 PTHREAD_TRACE(TRACE_wq_stalled, wq, wq->wq_nthreads, start_timer, 0, 0);
2046
2047 goto no_thread_to_run;
2048 }
2049
2050 /*
2051 * we already know there is both work available
2052 * and an idle thread, so activate a thread and then
2053 * fall into the code that pulls a new work request...
2054 */
2055 tl = TAILQ_FIRST(&wq->wq_thidlelist);
2056 TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
2057 wq->wq_thidlecount--;
2058
2059 TAILQ_INSERT_TAIL(&wq->wq_thrunlist, tl, th_entry);
2060
2061 if ((tl->th_flags & TH_LIST_SUSPENDED) == TH_LIST_SUSPENDED) {
2062 tl->th_flags &= ~TH_LIST_SUSPENDED;
2063 reuse_thread = 0;
2064
2065 } else if ((tl->th_flags & TH_LIST_BLOCKED) == TH_LIST_BLOCKED) {
2066 tl->th_flags &= ~TH_LIST_BLOCKED;
2067 wake_thread = 1;
2068 }
2069 tl->th_flags |= TH_LIST_RUNNING | TH_LIST_BUSY;
2070
2071 wq->wq_threads_scheduled++;
2072 wq->wq_thscheduled_count[priclass]++;
2073 OSAddAtomic(1, &wq->wq_thactive_count[priclass]);
2074
2075 adjust_counters = FALSE;
2076 th_to_run = tl->th_thread;
2077
2078 pick_up_work:
2079 if (!overcommit && !force_oc) {
2080 wq->wq_reqcount--;
2081 wq->wq_requests[priclass]--;
2082
2083 if ( !(tl->th_flags & TH_LIST_CONSTRAINED)) {
2084 wq->wq_constrained_threads_scheduled++;
2085 tl->th_flags |= TH_LIST_CONSTRAINED;
2086 }
2087 } else {
2088 if (tl->th_flags & TH_LIST_CONSTRAINED) {
2089 wq->wq_constrained_threads_scheduled--;
2090 tl->th_flags &= ~TH_LIST_CONSTRAINED;
2091 }
2092 }
2093
2094 orig_class = tl->th_priority;
2095 tl->th_priority = (uint8_t)priclass;
2096
2097 if (adjust_counters && (orig_class != priclass)) {
2098 /*
2099 * we need to adjust these counters based on this
2100 * thread's new disposition w/r to priority
2101 */
2102 OSAddAtomic(-1, &wq->wq_thactive_count[orig_class]);
2103 OSAddAtomic(1, &wq->wq_thactive_count[priclass]);
2104
2105 wq->wq_thscheduled_count[orig_class]--;
2106 wq->wq_thscheduled_count[priclass]++;
2107 }
2108 wq->wq_thread_yielded_count = 0;
2109
2110 workqueue_unlock(p);
2111
2112 if (orig_class != priclass) {
2113 pthread_priority_t pri = pthread_priority_from_class_index(priclass);
2114
2115 thread_qos_policy_data_t qosinfo;
2116
2117 /* Set the QoS tier on the thread, along with the ceiling of max importance for this class. */
2118 qosinfo.qos_tier = pthread_priority_get_qos_class(pri);
2119 qosinfo.tier_importance = 0;
2120
2121 PTHREAD_TRACE(TRACE_wq_reset_priority | DBG_FUNC_START, wq, thread_tid(tl->th_thread), pthread_priority_from_class_index(orig_class), 0, 0);
2122
2123 /* All the previous implementation here now boils down to setting the QoS policy on the thread. */
2124 pthread_kern->thread_policy_set_internal(th_to_run, THREAD_QOS_POLICY, (thread_policy_t)&qosinfo, THREAD_QOS_POLICY_COUNT);
2125
2126 PTHREAD_TRACE(TRACE_wq_reset_priority | DBG_FUNC_END, wq, thread_tid(tl->th_thread), pthread_priority_from_class_index(priclass), qosinfo.qos_tier, 0);
2127 }
2128
2129 /*
2130 * if current thread is reused for work request, does not return via unix_syscall
2131 */
2132 wq_runreq(p, overcommit, pthread_priority_from_class_index(priclass), th_to_run, tl, reuse_thread, wake_thread, (thread == th_to_run));
2133
2134 PTHREAD_TRACE(TRACE_wq_run_nextitem|DBG_FUNC_END, wq, thread_tid(th_to_run), overcommit, 1, 0);
2135
2136 return (TRUE);
2137
2138 out_of_work:
2139 /*
2140 * we have no work to do or we are fully booked
2141 * w/r to running threads...
2142 */
2143 no_thread_to_run:
2144 workqueue_unlock(p);
2145
2146 if (start_timer)
2147 workqueue_interval_timer_start(wq);
2148
2149 PTHREAD_TRACE(TRACE_wq_run_nextitem|DBG_FUNC_END, wq, thread_tid(thread), start_timer, 2, 0);
2150
2151 return (FALSE);
2152
2153 parkit:
2154 /*
2155 * this is a workqueue thread with no more
2156 * work to do... park it for now
2157 */
2158 TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
2159 tl->th_flags &= ~TH_LIST_RUNNING;
2160
2161 tl->th_flags |= TH_LIST_BLOCKED;
2162 TAILQ_INSERT_HEAD(&wq->wq_thidlelist, tl, th_entry);
2163
2164 pthread_kern->thread_sched_call(th_to_park, NULL);
2165
2166 OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority]);
2167 wq->wq_thscheduled_count[tl->th_priority]--;
2168 wq->wq_threads_scheduled--;
2169
2170 if (tl->th_flags & TH_LIST_CONSTRAINED) {
2171 wq->wq_constrained_threads_scheduled--;
2172 wq->wq_lflags &= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
2173 tl->th_flags &= ~TH_LIST_CONSTRAINED;
2174 }
2175 if (wq->wq_thidlecount < 100)
2176 us_to_wait = wq_reduce_pool_window_usecs - (wq->wq_thidlecount * (wq_reduce_pool_window_usecs / 100));
2177 else
2178 us_to_wait = wq_reduce_pool_window_usecs / 100;
2179
2180 wq->wq_thidlecount++;
2181 wq->wq_lflags &= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
2182
2183 assert_wait_timeout_with_leeway((caddr_t)tl, (THREAD_INTERRUPTIBLE),
2184 TIMEOUT_URGENCY_SYS_BACKGROUND|TIMEOUT_URGENCY_LEEWAY, us_to_wait,
2185 wq_reduce_pool_window_usecs, NSEC_PER_USEC);
2186
2187 workqueue_unlock(p);
2188
2189 if (start_timer)
2190 workqueue_interval_timer_start(wq);
2191
2192 PTHREAD_TRACE1(TRACE_wq_thread_park | DBG_FUNC_START, wq, wq->wq_threads_scheduled, wq->wq_thidlecount, us_to_wait, thread_tid(th_to_park));
2193 PTHREAD_TRACE(TRACE_wq_run_nextitem | DBG_FUNC_END, wq, thread_tid(thread), 0, 3, 0);
2194
2195 thread_block((thread_continue_t)wq_unpark_continue);
2196 /* NOT REACHED */
2197
2198 return (FALSE);
2199 }
2200
2201
2202 static void
2203 wq_unsuspend_continue(void)
2204 {
2205 struct uthread *uth = NULL;
2206 thread_t th_to_unsuspend;
2207 struct threadlist *tl;
2208 proc_t p;
2209
2210 th_to_unsuspend = current_thread();
2211 uth = pthread_kern->get_bsdthread_info(th_to_unsuspend);
2212
2213 if (uth != NULL && (tl = pthread_kern->uthread_get_threadlist(uth)) != NULL) {
2214
2215 if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) {
2216 /*
2217 * most likely a normal resume of this thread occurred...
2218 * it's also possible that the thread was aborted after we
2219 * finished setting it up so that it could be dispatched... if
2220 * so, thread_bootstrap_return will notice the abort and put
2221 * the thread on the path to self-destruction
2222 */
2223 normal_resume_to_user:
2224 pthread_kern->thread_sched_call(th_to_unsuspend, workqueue_callback);
2225 pthread_kern->thread_bootstrap_return();
2226 }
2227 /*
2228 * if we get here, it's because we've been resumed due to
2229 * an abort of this thread (process is crashing)
2230 */
2231 p = current_proc();
2232
2233 workqueue_lock_spin(p);
2234
2235 if (tl->th_flags & TH_LIST_SUSPENDED) {
2236 /*
2237 * thread has been aborted while still on our idle
2238 * queue... remove it from our domain...
2239 * workqueue_removethread consumes the lock
2240 */
2241 workqueue_removethread(tl, 0);
2242 pthread_kern->thread_bootstrap_return();
2243 }
2244 while ((tl->th_flags & TH_LIST_BUSY)) {
2245 /*
2246 * this thread was aborted after we started making
2247 * it runnable, but before we finished dispatching it...
2248 * we need to wait for that process to finish,
2249 * and we need to ask for a wakeup instead of a
2250 * thread_resume since the abort has already resumed us
2251 */
2252 tl->th_flags |= TH_LIST_NEED_WAKEUP;
2253
2254 assert_wait((caddr_t)tl, (THREAD_UNINT));
2255
2256 workqueue_unlock(p);
2257 thread_block(THREAD_CONTINUE_NULL);
2258 workqueue_lock_spin(p);
2259 }
2260 workqueue_unlock(p);
2261 /*
2262 * we have finished setting up the thread's context...
2263 * thread_bootstrap_return will take us through the abort path
2264 * where the thread will self destruct
2265 */
2266 goto normal_resume_to_user;
2267 }
2268 pthread_kern->thread_bootstrap_return();
2269 }
2270
2271
2272 static void
2273 wq_unpark_continue(void)
2274 {
2275 struct uthread *uth = NULL;
2276 struct threadlist *tl;
2277 thread_t th_to_unpark;
2278 proc_t p;
2279
2280 th_to_unpark = current_thread();
2281 uth = pthread_kern->get_bsdthread_info(th_to_unpark);
2282
2283 if (uth != NULL) {
2284 if ((tl = pthread_kern->uthread_get_threadlist(uth)) != NULL) {
2285
2286 if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) {
2287 /*
2288 * a normal wakeup of this thread occurred... no need
2289 * for any synchronization with the timer and wq_runreq
2290 */
2291 normal_return_to_user:
2292 pthread_kern->thread_sched_call(th_to_unpark, workqueue_callback);
2293
2294 PTHREAD_TRACE(0xefffd018 | DBG_FUNC_END, tl->th_workq, 0, 0, 0, 0);
2295
2296 pthread_kern->thread_exception_return();
2297 }
2298 p = current_proc();
2299
2300 workqueue_lock_spin(p);
2301
2302 if ( !(tl->th_flags & TH_LIST_RUNNING)) {
2303 /*
2304 * the timer popped us out and we've not
2305 * been moved off of the idle list
2306 * so we should now self-destruct
2307 *
2308 * workqueue_removethread consumes the lock
2309 */
2310 workqueue_removethread(tl, 0);
2311 pthread_kern->thread_exception_return();
2312 }
2313 /*
2314 * the timer woke us up, but we have already
2315 * started to make this a runnable thread,
2316 * but have not yet finished that process...
2317 * so wait for the normal wakeup
2318 */
2319 while ((tl->th_flags & TH_LIST_BUSY)) {
2320
2321 assert_wait((caddr_t)tl, (THREAD_UNINT));
2322
2323 workqueue_unlock(p);
2324
2325 thread_block(THREAD_CONTINUE_NULL);
2326
2327 workqueue_lock_spin(p);
2328 }
2329 /*
2330 * we have finished setting up the thread's context
2331 * now we can return as if we got a normal wakeup
2332 */
2333 workqueue_unlock(p);
2334
2335 goto normal_return_to_user;
2336 }
2337 }
2338 pthread_kern->thread_exception_return();
2339 }
2340
2341
2342
2343 static void
2344 wq_runreq(proc_t p, boolean_t overcommit, pthread_priority_t priority, thread_t th, struct threadlist *tl,
2345 int reuse_thread, int wake_thread, int return_directly)
2346 {
2347 int ret = 0;
2348 boolean_t need_resume = FALSE;
2349
2350 PTHREAD_TRACE1(TRACE_wq_runitem | DBG_FUNC_START, tl->th_workq, overcommit, priority, thread_tid(current_thread()), thread_tid(th));
2351
2352 ret = _setup_wqthread(p, th, overcommit, priority, reuse_thread, tl);
2353
2354 if (ret != 0)
2355 panic("setup_wqthread failed %x\n", ret);
2356
2357 if (return_directly) {
2358 PTHREAD_TRACE(TRACE_wq_run_nextitem|DBG_FUNC_END, tl->th_workq, 0, 0, 4, 0);
2359
2360 pthread_kern->thread_exception_return();
2361 panic("wq_runreq: thread_exception_return returned ...\n");
2362 }
2363 if (wake_thread) {
2364 workqueue_lock_spin(p);
2365
2366 tl->th_flags &= ~TH_LIST_BUSY;
2367 wakeup(tl);
2368
2369 workqueue_unlock(p);
2370 } else {
2371 PTHREAD_TRACE1(TRACE_wq_thread_suspend | DBG_FUNC_END, tl->th_workq, 0, 0, thread_tid(current_thread()), thread_tid(th));
2372
2373 workqueue_lock_spin(p);
2374
2375 if (tl->th_flags & TH_LIST_NEED_WAKEUP) {
2376 wakeup(tl);
2377 } else {
2378 need_resume = TRUE;
2379 }
2380
2381 tl->th_flags &= ~(TH_LIST_BUSY | TH_LIST_NEED_WAKEUP);
2382
2383 workqueue_unlock(p);
2384
2385 if (need_resume) {
2386 /*
2387 * need to do this outside of the workqueue spin lock
2388 * since thread_resume locks the thread via a full mutex
2389 */
2390 pthread_kern->thread_resume(th);
2391 }
2392 }
2393 }
2394
2395
2396 int
2397 _setup_wqthread(proc_t p, thread_t th, boolean_t overcommit, pthread_priority_t priority, int reuse_thread, struct threadlist *tl)
2398 {
2399 uint32_t flags = reuse_thread | WQ_FLAG_THREAD_NEWSPI;
2400 mach_vm_size_t guardsize = vm_map_page_size(tl->th_workq->wq_map);
2401 int error = 0;
2402
2403 if (overcommit) {
2404 flags |= WQ_FLAG_THREAD_OVERCOMMIT;
2405 }
2406
2407 /* Put the QoS class value into the lower bits of the reuse_thread register, this is where
2408 * the thread priority used to be stored anyway.
2409 */
2410 flags |= (_pthread_priority_get_qos_newest(priority) & WQ_FLAG_THREAD_PRIOMASK);
2411
2412 #if defined(__i386__) || defined(__x86_64__)
2413 int isLP64 = proc_is64bit(p);
2414
2415 /*
2416 * Set up i386 registers & function call.
2417 */
2418 if (isLP64 == 0) {
2419 x86_thread_state32_t state;
2420 x86_thread_state32_t *ts = &state;
2421
2422 ts->eip = (unsigned int)pthread_kern->proc_get_wqthread(p);
2423 ts->eax = (unsigned int)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize);
2424 ts->ebx = (unsigned int)tl->th_thport;
2425 ts->ecx = (unsigned int)(tl->th_stackaddr + guardsize);
2426 ts->edx = (unsigned int)0;
2427 ts->edi = (unsigned int)flags;
2428 ts->esi = (unsigned int)0;
2429 /*
2430 * set stack pointer
2431 */
2432 ts->esp = (int)((vm_offset_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize) - C_32_STK_ALIGN));
2433
2434 (void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)ts);
2435
2436 } else {
2437 x86_thread_state64_t state64;
2438 x86_thread_state64_t *ts64 = &state64;
2439
2440 ts64->rip = (uint64_t)pthread_kern->proc_get_wqthread(p);
2441 ts64->rdi = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize);
2442 ts64->rsi = (uint64_t)(tl->th_thport);
2443 ts64->rdx = (uint64_t)(tl->th_stackaddr + guardsize);
2444 ts64->rcx = (uint64_t)0;
2445 ts64->r8 = (uint64_t)flags;
2446 ts64->r9 = (uint64_t)0;
2447
2448 /*
2449 * set stack pointer aligned to 16 byte boundary
2450 */
2451 ts64->rsp = (uint64_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize) - C_64_REDZONE_LEN);
2452
2453 error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)ts64);
2454 if (error != KERN_SUCCESS) {
2455 error = EINVAL;
2456 }
2457 }
2458 #else
2459 #error setup_wqthread not defined for this architecture
2460 #endif
2461
2462 return error;
2463 }
2464
2465 int
2466 _fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo)
2467 {
2468 struct workqueue * wq;
2469 int error = 0;
2470 int activecount;
2471 uint32_t pri;
2472
2473 workqueue_lock_spin(p);
2474 if ((wq = pthread_kern->proc_get_wqptr(p)) == NULL) {
2475 error = EINVAL;
2476 goto out;
2477 }
2478 activecount = 0;
2479
2480 for (pri = 0; pri < WORKQUEUE_NUM_BUCKETS; pri++) {
2481 activecount += wq->wq_thactive_count[pri];
2482 }
2483 pwqinfo->pwq_nthreads = wq->wq_nthreads;
2484 pwqinfo->pwq_runthreads = activecount;
2485 pwqinfo->pwq_blockedthreads = wq->wq_threads_scheduled - activecount;
2486 pwqinfo->pwq_state = 0;
2487
2488 if (wq->wq_lflags & WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT) {
2489 pwqinfo->pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
2490 }
2491
2492 if (wq->wq_lflags & WQL_EXCEEDED_TOTAL_THREAD_LIMIT) {
2493 pwqinfo->pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
2494 }
2495
2496 out:
2497 workqueue_unlock(p);
2498 return(error);
2499 }
2500
2501 int
2502 _thread_selfid(__unused struct proc *p, uint64_t *retval)
2503 {
2504 thread_t thread = current_thread();
2505 *retval = thread_tid(thread);
2506 return KERN_SUCCESS;
2507 }
2508
2509 void
2510 _pthread_init(void)
2511 {
2512 pthread_lck_grp_attr = lck_grp_attr_alloc_init();
2513 pthread_lck_grp = lck_grp_alloc_init("pthread", pthread_lck_grp_attr);
2514
2515 /*
2516 * allocate the lock attribute for pthread synchronizers
2517 */
2518 pthread_lck_attr = lck_attr_alloc_init();
2519
2520 _workqueue_init_lock((proc_t)get_bsdtask_info(kernel_task));
2521 pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
2522
2523 pth_global_hashinit();
2524 psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL);
2525 psynch_zoneinit();
2526
2527 /*
2528 * register sysctls
2529 */
2530 sysctl_register_oid(&sysctl__kern_wq_yielded_threshold);
2531 sysctl_register_oid(&sysctl__kern_wq_yielded_window_usecs);
2532 sysctl_register_oid(&sysctl__kern_wq_stalled_window_usecs);
2533 sysctl_register_oid(&sysctl__kern_wq_reduce_pool_window_usecs);
2534 sysctl_register_oid(&sysctl__kern_wq_max_timer_interval_usecs);
2535 sysctl_register_oid(&sysctl__kern_wq_max_threads);
2536 sysctl_register_oid(&sysctl__kern_wq_max_constrained_threads);
2537 sysctl_register_oid(&sysctl__kern_pthread_debug_tracing);
2538 }