]> git.saurik.com Git - apple/libpthread.git/blob - kern/kern_support.c
4d1907e3a8f7dbbc117fb0dc698523414c601c54
[apple/libpthread.git] / kern / kern_support.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
29 /*
30 * pthread_synch.c
31 */
32
33 #define _PTHREAD_CONDATTR_T
34 #define _PTHREAD_COND_T
35 #define _PTHREAD_MUTEXATTR_T
36 #define _PTHREAD_MUTEX_T
37 #define _PTHREAD_RWLOCKATTR_T
38 #define _PTHREAD_RWLOCK_T
39
40 #undef pthread_mutexattr_t
41 #undef pthread_mutex_t
42 #undef pthread_condattr_t
43 #undef pthread_cond_t
44 #undef pthread_rwlockattr_t
45 #undef pthread_rwlock_t
46
47 #include <sys/param.h>
48 #include <sys/queue.h>
49 #include <sys/resourcevar.h>
50 //#include <sys/proc_internal.h>
51 #include <sys/kauth.h>
52 #include <sys/systm.h>
53 #include <sys/timeb.h>
54 #include <sys/times.h>
55 #include <sys/acct.h>
56 #include <sys/kernel.h>
57 #include <sys/wait.h>
58 #include <sys/signalvar.h>
59 #include <sys/sysctl.h>
60 #include <sys/syslog.h>
61 #include <sys/stat.h>
62 #include <sys/lock.h>
63 #include <sys/kdebug.h>
64 //#include <sys/sysproto.h>
65 #include <sys/vm.h>
66 #include <sys/user.h> /* for coredump */
67 #include <sys/proc_info.h> /* for fill_procworkqueue */
68
69
70 #include <mach/mach_port.h>
71 #include <mach/mach_types.h>
72 #include <mach/semaphore.h>
73 #include <mach/sync_policy.h>
74 #include <mach/task.h>
75 #include <mach/vm_prot.h>
76 #include <kern/kern_types.h>
77 #include <kern/task.h>
78 #include <kern/clock.h>
79 #include <mach/kern_return.h>
80 #include <kern/thread.h>
81 #include <kern/sched_prim.h>
82 #include <kern/kalloc.h>
83 #include <kern/sched_prim.h> /* for thread_exception_return */
84 #include <kern/processor.h>
85 #include <kern/assert.h>
86 #include <mach/mach_vm.h>
87 #include <mach/mach_param.h>
88 #include <mach/thread_status.h>
89 #include <mach/thread_policy.h>
90 #include <mach/message.h>
91 #include <mach/port.h>
92 //#include <vm/vm_protos.h>
93 #include <vm/vm_fault.h>
94 #include <vm/vm_map.h>
95 #include <mach/thread_act.h> /* for thread_resume */
96 #include <machine/machine_routines.h>
97
98 #include <libkern/OSAtomic.h>
99
100 #include <sys/pthread_shims.h>
101 #include "kern_internal.h"
102
103 uint32_t pthread_debug_tracing = 0;
104
105 SYSCTL_INT(_kern, OID_AUTO, pthread_debug_tracing, CTLFLAG_RW | CTLFLAG_LOCKED,
106 &pthread_debug_tracing, 0, "")
107
108 // XXX: Dirty import for sys/signarvar.h that's wrapped in BSD_KERNEL_PRIVATE
109 #define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP))
110
111 lck_grp_attr_t *pthread_lck_grp_attr;
112 lck_grp_t *pthread_lck_grp;
113 lck_attr_t *pthread_lck_attr;
114
115 extern void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64);
116 extern void workqueue_thread_yielded(void);
117
118 static boolean_t workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t th, boolean_t force_oc,
119 boolean_t overcommit, pthread_priority_t oc_prio);
120
121 static boolean_t workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, pthread_priority_t priority);
122
123 static void wq_runreq(proc_t p, boolean_t overcommit, pthread_priority_t priority, thread_t th, struct threadlist *tl,
124 int reuse_thread, int wake_thread, int return_directly);
125
126 static int _setup_wqthread(proc_t p, thread_t th, boolean_t overcommit, pthread_priority_t priority, int reuse_thread, struct threadlist *tl);
127
128 static void wq_unpark_continue(void);
129 static void wq_unsuspend_continue(void);
130
131 static boolean_t workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread);
132 static void workqueue_removethread(struct threadlist *tl, int fromexit);
133 static void workqueue_lock_spin(proc_t);
134 static void workqueue_unlock(proc_t);
135
136 int proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc);
137 int proc_setalltargetconc(pid_t pid, int32_t * targetconcp);
138
139 #define WQ_MAXPRI_MIN 0 /* low prio queue num */
140 #define WQ_MAXPRI_MAX 2 /* max prio queuenum */
141 #define WQ_PRI_NUM 3 /* number of prio work queues */
142
143 #define C_32_STK_ALIGN 16
144 #define C_64_STK_ALIGN 16
145 #define C_64_REDZONE_LEN 128
146 #define TRUNC_DOWN32(a,c) ((((uint32_t)a)-(c)) & ((uint32_t)(-(c))))
147 #define TRUNC_DOWN64(a,c) ((((uint64_t)a)-(c)) & ((uint64_t)(-(c))))
148
149 /*
150 * Flags filed passed to bsdthread_create and back in pthread_start
151 31 <---------------------------------> 0
152 _________________________________________
153 | flags(8) | policy(8) | importance(16) |
154 -----------------------------------------
155 */
156
157 #define PTHREAD_START_CUSTOM 0x01000000
158 #define PTHREAD_START_SETSCHED 0x02000000
159 #define PTHREAD_START_DETACHED 0x04000000
160 #define PTHREAD_START_QOSCLASS 0x08000000
161 #define PTHREAD_START_QOSCLASS_MASK 0xffffff
162 #define PTHREAD_START_POLICY_BITSHIFT 16
163 #define PTHREAD_START_POLICY_MASK 0xff
164 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
165
166 #define SCHED_OTHER POLICY_TIMESHARE
167 #define SCHED_FIFO POLICY_FIFO
168 #define SCHED_RR POLICY_RR
169
170 int
171 _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcarg, user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags, user_addr_t *retval)
172 {
173 kern_return_t kret;
174 void * sright;
175 int error = 0;
176 int allocated = 0;
177 mach_vm_offset_t stackaddr;
178 mach_vm_size_t th_allocsize = 0;
179 mach_vm_size_t user_stacksize;
180 mach_vm_size_t th_stacksize;
181 mach_vm_size_t th_guardsize;
182 mach_vm_offset_t th_stackaddr;
183 mach_vm_offset_t th_stack;
184 mach_vm_offset_t th_pthread;
185 mach_port_name_t th_thport;
186 thread_t th;
187 vm_map_t vmap = pthread_kern->current_map();
188 task_t ctask = current_task();
189 unsigned int policy, importance;
190
191 int isLP64 = 0;
192
193 if (pthread_kern->proc_get_register(p) == 0) {
194 return EINVAL;
195 }
196
197 PTHREAD_TRACE(TRACE_pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0, 0);
198
199 isLP64 = proc_is64bit(p);
200 th_guardsize = vm_map_page_size(vmap);
201
202 #if defined(__i386__) || defined(__x86_64__)
203 stackaddr = 0xB0000000;
204 #else
205 #error Need to define a stack address hint for this architecture
206 #endif
207 kret = pthread_kern->thread_create(ctask, &th);
208 if (kret != KERN_SUCCESS)
209 return(ENOMEM);
210 thread_reference(th);
211
212 sright = (void *)pthread_kern->convert_thread_to_port(th);
213 th_thport = pthread_kern->ipc_port_copyout_send(sright, pthread_kern->task_get_ipcspace(ctask));
214
215 if ((flags & PTHREAD_START_CUSTOM) == 0) {
216 th_stacksize = (mach_vm_size_t)user_stack; /* if it is custom them it is stacksize */
217 th_allocsize = th_stacksize + th_guardsize + pthread_kern->proc_get_pthsize(p);
218
219 kret = mach_vm_map(vmap, &stackaddr,
220 th_allocsize,
221 page_size-1,
222 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
223 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
224 VM_INHERIT_DEFAULT);
225 if (kret != KERN_SUCCESS)
226 kret = mach_vm_allocate(vmap,
227 &stackaddr, th_allocsize,
228 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
229 if (kret != KERN_SUCCESS) {
230 error = ENOMEM;
231 goto out;
232 }
233
234 PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, th_allocsize, stackaddr, 0, 2, 0);
235
236 th_stackaddr = stackaddr;
237 allocated = 1;
238 /*
239 * The guard page is at the lowest address
240 * The stack base is the highest address
241 */
242 kret = mach_vm_protect(vmap, stackaddr, th_guardsize, FALSE, VM_PROT_NONE);
243
244 if (kret != KERN_SUCCESS) {
245 error = ENOMEM;
246 goto out1;
247 }
248 th_stack = (stackaddr + th_stacksize + th_guardsize);
249 th_pthread = (stackaddr + th_stacksize + th_guardsize);
250 user_stacksize = th_stacksize;
251
252 /*
253 * Pre-fault the first page of the new thread's stack and the page that will
254 * contain the pthread_t structure.
255 */
256 vm_fault( vmap,
257 vm_map_trunc_page_mask(th_stack - PAGE_SIZE_64, vm_map_page_mask(vmap)),
258 VM_PROT_READ | VM_PROT_WRITE,
259 FALSE,
260 THREAD_UNINT, NULL, 0);
261
262 vm_fault( vmap,
263 vm_map_trunc_page_mask(th_pthread, vm_map_page_mask(vmap)),
264 VM_PROT_READ | VM_PROT_WRITE,
265 FALSE,
266 THREAD_UNINT, NULL, 0);
267 } else {
268 th_stack = user_stack;
269 user_stacksize = user_stack;
270 th_pthread = user_pthread;
271
272 PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3, 0);
273 }
274
275 #if defined(__i386__) || defined(__x86_64__)
276 /*
277 * Set up i386 registers & function call.
278 */
279 if (isLP64 == 0) {
280 x86_thread_state32_t state;
281 x86_thread_state32_t *ts = &state;
282
283 ts->eip = (unsigned int)pthread_kern->proc_get_threadstart(p);
284 ts->eax = (unsigned int)th_pthread;
285 ts->ebx = (unsigned int)th_thport;
286 ts->ecx = (unsigned int)user_func;
287 ts->edx = (unsigned int)user_funcarg;
288 ts->edi = (unsigned int)user_stacksize;
289 ts->esi = (unsigned int)flags;
290 /*
291 * set stack pointer
292 */
293 ts->esp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN));
294
295 error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)ts);
296 if (error != KERN_SUCCESS) {
297 error = EINVAL;
298 goto out;
299 }
300 } else {
301 x86_thread_state64_t state64;
302 x86_thread_state64_t *ts64 = &state64;
303
304 ts64->rip = (uint64_t)pthread_kern->proc_get_threadstart(p);
305 ts64->rdi = (uint64_t)th_pthread;
306 ts64->rsi = (uint64_t)(th_thport);
307 ts64->rdx = (uint64_t)user_func;
308 ts64->rcx = (uint64_t)user_funcarg;
309 ts64->r8 = (uint64_t)user_stacksize;
310 ts64->r9 = (uint64_t)flags;
311 /*
312 * set stack pointer aligned to 16 byte boundary
313 */
314 ts64->rsp = (uint64_t)(th_stack - C_64_REDZONE_LEN);
315
316 error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)ts64);
317 if (error != KERN_SUCCESS) {
318 error = EINVAL;
319 goto out;
320 }
321
322 }
323 #elif defined(__arm__)
324 arm_thread_state_t state;
325 arm_thread_state_t *ts = &state;
326
327 ts->pc = (int)pthread_kern->proc_get_threadstart(p);
328 ts->r[0] = (unsigned int)th_pthread;
329 ts->r[1] = (unsigned int)th_thport;
330 ts->r[2] = (unsigned int)user_func;
331 ts->r[3] = (unsigned int)user_funcarg;
332 ts->r[4] = (unsigned int)user_stacksize;
333 ts->r[5] = (unsigned int)flags;
334
335 /* Set r7 & lr to 0 for better back tracing */
336 ts->r[7] = 0;
337 ts->lr = 0;
338
339 /*
340 * set stack pointer
341 */
342 ts->sp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN));
343
344 (void) pthread_kern->thread_set_wq_state32(th, (thread_state_t)ts);
345
346 #else
347 #error bsdthread_create not defined for this architecture
348 #endif
349
350 if ((flags & PTHREAD_START_SETSCHED) != 0) {
351 /* Set scheduling parameters if needed */
352 thread_extended_policy_data_t extinfo;
353 thread_precedence_policy_data_t precedinfo;
354
355 importance = (flags & PTHREAD_START_IMPORTANCE_MASK);
356 policy = (flags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK;
357
358 if (policy == SCHED_OTHER) {
359 extinfo.timeshare = 1;
360 } else {
361 extinfo.timeshare = 0;
362 }
363
364 thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT);
365
366 #define BASEPRI_DEFAULT 31
367 precedinfo.importance = (importance - BASEPRI_DEFAULT);
368 thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
369 } else if ((flags & PTHREAD_START_QOSCLASS) != 0) {
370 /* Set thread QoS class if requested. */
371 pthread_priority_t priority = (pthread_priority_t)(flags & PTHREAD_START_QOSCLASS_MASK);
372
373 thread_qos_policy_data_t qos;
374 qos.qos_tier = pthread_priority_get_qos_class(priority);
375 qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 :
376 _pthread_priority_get_relpri(priority);
377
378 pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
379 }
380
381 kret = pthread_kern->thread_resume(th);
382 if (kret != KERN_SUCCESS) {
383 error = EINVAL;
384 goto out1;
385 }
386 thread_deallocate(th); /* drop the creator reference */
387
388 PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_END, error, th_pthread, 0, 0, 0);
389
390 *retval = th_pthread;
391
392 return(0);
393
394 out1:
395 if (allocated != 0) {
396 (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize);
397 }
398 out:
399 (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(ctask), th_thport);
400 (void)thread_terminate(th);
401 (void)thread_deallocate(th);
402 return(error);
403 }
404
405 int
406 _bsdthread_terminate(__unused struct proc *p,
407 user_addr_t stackaddr,
408 size_t size,
409 uint32_t kthport,
410 uint32_t sem,
411 __unused int32_t *retval)
412 {
413 mach_vm_offset_t freeaddr;
414 mach_vm_size_t freesize;
415 kern_return_t kret;
416
417 freeaddr = (mach_vm_offset_t)stackaddr;
418 freesize = size;
419
420 PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff, 0);
421
422 if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) {
423 kret = mach_vm_deallocate(pthread_kern->current_map(), freeaddr, freesize);
424 if (kret != KERN_SUCCESS) {
425 PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0);
426 return(EINVAL);
427 }
428 }
429
430 (void) thread_terminate(current_thread());
431 if (sem != MACH_PORT_NULL) {
432 kret = pthread_kern->semaphore_signal_internal_trap(sem);
433 if (kret != KERN_SUCCESS) {
434 PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0);
435 return(EINVAL);
436 }
437 }
438
439 if (kthport != MACH_PORT_NULL) {
440 pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(current_task()), kthport);
441 }
442
443 PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0, 0);
444
445 pthread_kern->thread_exception_return();
446 panic("bsdthread_terminate: still running\n");
447
448 PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0xff, 0, 0, 0);
449
450 return(0);
451 }
452
453 int
454 _bsdthread_register(struct proc *p,
455 user_addr_t threadstart,
456 user_addr_t wqthread,
457 int pthsize,
458 user_addr_t pthread_init_data,
459 user_addr_t targetconc_ptr,
460 uint64_t dispatchqueue_offset,
461 int32_t *retval)
462 {
463 /* prevent multiple registrations */
464 if (pthread_kern->proc_get_register(p) != 0) {
465 return(EINVAL);
466 }
467 /* syscall randomizer test can pass bogus values */
468 if (pthsize < 0 || pthsize > MAX_PTHREAD_SIZE) {
469 return(EINVAL);
470 }
471 pthread_kern->proc_set_threadstart(p, threadstart);
472 pthread_kern->proc_set_wqthread(p, wqthread);
473 pthread_kern->proc_set_pthsize(p, pthsize);
474 pthread_kern->proc_set_register(p);
475
476 /* if we have pthread_init_data, then we use that and target_concptr (which is an offset) get data. */
477 if (pthread_init_data != 0) {
478 thread_qos_policy_data_t qos;
479
480 struct _pthread_registration_data data;
481 size_t pthread_init_sz = MIN(sizeof(struct _pthread_registration_data), (size_t)targetconc_ptr);
482
483 kern_return_t kr = copyin(pthread_init_data, &data, pthread_init_sz);
484 if (kr != KERN_SUCCESS) {
485 return EINVAL;
486 }
487
488 /* Incoming data from the data structure */
489 pthread_kern->proc_set_dispatchqueue_offset(p, data.dispatch_queue_offset);
490
491 /* Outgoing data that userspace expects as a reply */
492 if (pthread_kern->qos_main_thread_active()) {
493 mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT;
494 boolean_t gd = FALSE;
495
496 kr = pthread_kern->thread_policy_get(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
497 if (kr != KERN_SUCCESS || qos.qos_tier == THREAD_QOS_UNSPECIFIED) {
498 /* Unspecified threads means the kernel wants us to impose legacy upon the thread. */
499 qos.qos_tier = THREAD_QOS_LEGACY;
500 qos.tier_importance = 0;
501
502 kr = pthread_kern->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
503 }
504
505 if (kr == KERN_SUCCESS) {
506 data.main_qos = pthread_qos_class_get_priority(qos.qos_tier);
507 } else {
508 data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
509 }
510 } else {
511 data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
512 }
513
514 kr = copyout(&data, pthread_init_data, pthread_init_sz);
515 if (kr != KERN_SUCCESS) {
516 return EINVAL;
517 }
518 } else {
519 pthread_kern->proc_set_dispatchqueue_offset(p, dispatchqueue_offset);
520 pthread_kern->proc_set_targconc(p, targetconc_ptr);
521 }
522
523 /* return the supported feature set as the return value. */
524 *retval = PTHREAD_FEATURE_SUPPORTED;
525
526 return(0);
527 }
528
529 int
530 _bsdthread_ctl_set_qos(struct proc *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t tsd_priority_addr, user_addr_t arg3, int *retval)
531 {
532 kern_return_t kr;
533 thread_t th;
534
535 pthread_priority_t priority;
536
537 /* Unused parameters must be zero. */
538 if (arg3 != 0) {
539 return EINVAL;
540 }
541
542 /* QoS is stored in a given slot in the pthread TSD. We need to copy that in and set our QoS based on it. */
543 if (proc_is64bit(p)) {
544 uint64_t v;
545 kr = copyin(tsd_priority_addr, &v, sizeof(v));
546 if (kr != KERN_SUCCESS) {
547 return kr;
548 }
549 priority = (int)(v & 0xffffffff);
550 } else {
551 uint32_t v;
552 kr = copyin(tsd_priority_addr, &v, sizeof(v));
553 if (kr != KERN_SUCCESS) {
554 return kr;
555 }
556 priority = v;
557 }
558
559 if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
560 return ESRCH;
561 }
562
563 /* <rdar://problem/16211829> Disable pthread_set_qos_class_np() on threads other than pthread_self */
564 if (th != current_thread()) {
565 thread_deallocate(th);
566 return EPERM;
567 }
568
569 int rv = _bsdthread_ctl_set_self(p, 0, priority, 0, _PTHREAD_SET_SELF_QOS_FLAG, retval);
570
571 /* Static param the thread, we just set QoS on it, so its stuck in QoS land now. */
572 /* pthread_kern->thread_static_param(th, TRUE); */ // see <rdar://problem/16433744>, for details
573
574 thread_deallocate(th);
575
576 return rv;
577 }
578
579 static inline struct threadlist *
580 util_get_thread_threadlist_entry(thread_t th)
581 {
582 struct uthread *uth = pthread_kern->get_bsdthread_info(th);
583 if (uth) {
584 struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
585 return tl;
586 }
587 return NULL;
588 }
589
590 static inline void
591 wq_thread_override_reset(thread_t th)
592 {
593 struct uthread *uth = pthread_kern->get_bsdthread_info(th);
594 struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
595
596 if (tl) {
597 /*
598 * Drop all outstanding overrides on this thread, done outside the wq lock
599 * because proc_usynch_thread_qos_remove_override takes a spinlock that
600 * could cause us to panic.
601 */
602 uint32_t count = tl->th_dispatch_override_count;
603 while (!OSCompareAndSwap(count, 0, &tl->th_dispatch_override_count)) {
604 count = tl->th_dispatch_override_count;
605 }
606
607 PTHREAD_TRACE(TRACE_wq_override_reset | DBG_FUNC_NONE, tl->th_workq, count, 0, 0, 0);
608
609 for (int i=count; i>0; i--) {
610 pthread_kern->proc_usynch_thread_qos_remove_override(uth, 0);
611 }
612 }
613 }
614
615 int
616 _bsdthread_ctl_set_self(struct proc *p, user_addr_t __unused cmd, pthread_priority_t priority, mach_port_name_t voucher, _pthread_set_flags_t flags, int __unused *retval)
617 {
618 thread_qos_policy_data_t qos;
619 mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT;
620 boolean_t gd = FALSE;
621
622 kern_return_t kr;
623 int qos_rv = 0, voucher_rv = 0, fixedpri_rv = 0;
624
625 if ((flags & _PTHREAD_SET_SELF_QOS_FLAG) != 0) {
626 kr = pthread_kern->thread_policy_get(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
627 if (kr != KERN_SUCCESS) {
628 qos_rv = EINVAL;
629 goto voucher;
630 }
631
632 /* If we have main-thread QoS then we don't allow a thread to come out of QOS_CLASS_UNSPECIFIED. */
633 if (pthread_kern->qos_main_thread_active() && qos.qos_tier == THREAD_QOS_UNSPECIFIED) {
634 qos_rv = EPERM;
635 goto voucher;
636 }
637
638 /* Get the work queue for tracing, also the threadlist for bucket manipluation. */
639 struct workqueue *wq = NULL;
640 struct threadlist *tl = util_get_thread_threadlist_entry(current_thread());
641 if (tl) {
642 wq = tl->th_workq;
643 }
644
645 PTHREAD_TRACE(TRACE_pthread_set_qos_self | DBG_FUNC_START, wq, qos.qos_tier, qos.tier_importance, 0, 0);
646
647 qos.qos_tier = pthread_priority_get_qos_class(priority);
648 qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 : _pthread_priority_get_relpri(priority);
649
650 kr = pthread_kern->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
651 if (kr != KERN_SUCCESS) {
652 qos_rv = EINVAL;
653 goto voucher;
654 }
655
656 /* If we're a workqueue, the threadlist item priority needs adjusting, along with the bucket we were running in. */
657 if (tl) {
658 workqueue_lock_spin(p);
659
660 /* Fix up counters. */
661 uint8_t old_bucket = tl->th_priority;
662 uint8_t new_bucket = pthread_priority_get_class_index(priority);
663
664 uint32_t old_active = OSAddAtomic(-1, &wq->wq_thactive_count[old_bucket]);
665 OSAddAtomic(1, &wq->wq_thactive_count[new_bucket]);
666
667 wq->wq_thscheduled_count[old_bucket]--;
668 wq->wq_thscheduled_count[new_bucket]++;
669
670 tl->th_priority = new_bucket;
671
672 /* If we were at the ceiling of non-overcommitted threads for a given bucket, we have to
673 * reevaluate whether we should start more work.
674 */
675 if (old_active == wq->wq_reqconc[old_bucket]) {
676 /* workqueue_run_nextreq will drop the workqueue lock in all exit paths. */
677 (void)workqueue_run_nextreq(p, wq, THREAD_NULL, FALSE, FALSE, 0);
678 } else {
679 workqueue_unlock(p);
680 }
681 }
682
683 PTHREAD_TRACE(TRACE_pthread_set_qos_self | DBG_FUNC_END, wq, qos.qos_tier, qos.tier_importance, 0, 0);
684 }
685
686 voucher:
687 if ((flags & _PTHREAD_SET_SELF_VOUCHER_FLAG) != 0) {
688 kr = pthread_kern->thread_set_voucher_name(voucher);
689 if (kr != KERN_SUCCESS) {
690 voucher_rv = ENOENT;
691 goto fixedpri;
692 }
693 }
694
695 fixedpri:
696 if ((flags & _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG) != 0) {
697 thread_extended_policy_data_t extpol;
698 thread_t thread = current_thread();
699
700 extpol.timeshare = 0;
701
702 struct threadlist *tl = util_get_thread_threadlist_entry(thread);
703 if (tl) {
704 /* Not allowed on workqueue threads, since there is no symmetric clear function */
705 fixedpri_rv = ENOTSUP;
706 goto done;
707 }
708
709 kr = pthread_kern->thread_policy_set_internal(thread, THREAD_EXTENDED_POLICY, (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT);
710 if (kr != KERN_SUCCESS) {
711 fixedpri_rv = EINVAL;
712 goto done;
713 }
714 }
715
716 done:
717 if (qos_rv && voucher_rv) {
718 /* Both failed, give that a unique error. */
719 return EBADMSG;
720 }
721
722 if (qos_rv) {
723 return qos_rv;
724 }
725
726 if (voucher_rv) {
727 return voucher_rv;
728 }
729
730 if (fixedpri_rv) {
731 return fixedpri_rv;
732 }
733
734 return 0;
735 }
736
737 int
738 _bsdthread_ctl_qos_override_start(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, pthread_priority_t priority, user_addr_t arg3, int __unused *retval)
739 {
740 thread_t th;
741 int rv = 0;
742
743 if (arg3 != 0) {
744 return EINVAL;
745 }
746
747 if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
748 return ESRCH;
749 }
750
751 struct uthread *uth = pthread_kern->get_bsdthread_info(th);
752 int override_qos = pthread_priority_get_qos_class(priority);
753
754 struct threadlist *tl = util_get_thread_threadlist_entry(th);
755 if (tl) {
756 /* Workqueue threads count their overrides, so they can forcibly balance any outstanding
757 * overrides when they return to the kernel.
758 */
759 uint32_t o = OSAddAtomic(1, &tl->th_override_count);
760 PTHREAD_TRACE(TRACE_wq_override_start | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), o+1, priority, 0);
761 }
762
763 /* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */
764 pthread_kern->proc_usynch_thread_qos_add_override(uth, 0, override_qos, TRUE);
765
766 thread_deallocate(th);
767 return rv;
768 }
769
770 int
771 _bsdthread_ctl_qos_override_end(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t arg2, user_addr_t arg3, int __unused *retval)
772 {
773 thread_t th;
774 int rv = 0;
775
776 if (arg2 != 0 || arg3 != 0) {
777 return EINVAL;
778 }
779
780 if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
781 return ESRCH;
782 }
783
784 struct uthread *uth = pthread_kern->get_bsdthread_info(th);
785
786 struct threadlist *tl = util_get_thread_threadlist_entry(th);
787 if (tl) {
788 uint32_t o = OSAddAtomic(-1, &tl->th_override_count);
789
790 PTHREAD_TRACE(TRACE_wq_override_end | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), o-1, 0, 0);
791
792 if (o == 0) {
793 /* underflow! */
794 thread_deallocate(th);
795 return EFAULT;
796 }
797 }
798
799 pthread_kern->proc_usynch_thread_qos_remove_override(uth, 0);
800
801 thread_deallocate(th);
802 return rv;
803 }
804
805 int
806 _bsdthread_ctl_qos_override_dispatch(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, pthread_priority_t priority, user_addr_t arg3, int __unused *retval)
807 {
808 thread_t th;
809 int rv = 0;
810
811 if (arg3 != 0) {
812 return EINVAL;
813 }
814
815 if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
816 return ESRCH;
817 }
818
819 struct uthread *uth = pthread_kern->get_bsdthread_info(th);
820 int override_qos = pthread_priority_get_qos_class(priority);
821
822 struct threadlist *tl = util_get_thread_threadlist_entry(th);
823 if (!tl) {
824 thread_deallocate(th);
825 return EPERM;
826 }
827
828 /* Workqueue threads count their overrides, so they can forcibly balance any outstanding
829 * overrides when they return to the kernel.
830 */
831 uint32_t o = OSAddAtomic(1, &tl->th_dispatch_override_count);
832 PTHREAD_TRACE(TRACE_wq_override_dispatch | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), o+1, priority, 0);
833
834 /* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */
835 pthread_kern->proc_usynch_thread_qos_add_override(uth, 0, override_qos, TRUE);
836
837 thread_deallocate(th);
838 return rv;
839 }
840
841 int
842 _bsdthread_ctl_qos_override_reset(struct proc __unused *p, user_addr_t __unused cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int __unused *retval)
843 {
844 thread_t th;
845 struct threadlist *tl;
846 int rv = 0;
847
848 if (arg1 != 0 || arg2 != 0 || arg3 != 0) {
849 return EINVAL;
850 }
851
852 th = current_thread();
853 tl = util_get_thread_threadlist_entry(th);
854
855 if (tl) {
856 wq_thread_override_reset(th);
857 } else {
858 rv = EPERM;
859 }
860
861 return rv;
862 }
863
864 int
865 _bsdthread_ctl(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval)
866 {
867 switch (cmd) {
868 case BSDTHREAD_CTL_SET_QOS:
869 return _bsdthread_ctl_set_qos(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval);
870 case BSDTHREAD_CTL_QOS_OVERRIDE_START:
871 return _bsdthread_ctl_qos_override_start(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
872 case BSDTHREAD_CTL_QOS_OVERRIDE_END:
873 return _bsdthread_ctl_qos_override_end(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval);
874 case BSDTHREAD_CTL_QOS_OVERRIDE_RESET:
875 return _bsdthread_ctl_qos_override_reset(p, cmd, arg1, arg2, arg3, retval);
876 case BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH:
877 return _bsdthread_ctl_qos_override_dispatch(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
878 case BSDTHREAD_CTL_SET_SELF:
879 return _bsdthread_ctl_set_self(p, cmd, (pthread_priority_t)arg1, (mach_port_name_t)arg2, (_pthread_set_flags_t)arg3, retval);
880 default:
881 return EINVAL;
882 }
883 }
884
885 uint32_t wq_yielded_threshold = WQ_YIELDED_THRESHOLD;
886 uint32_t wq_yielded_window_usecs = WQ_YIELDED_WINDOW_USECS;
887 uint32_t wq_stalled_window_usecs = WQ_STALLED_WINDOW_USECS;
888 uint32_t wq_reduce_pool_window_usecs = WQ_REDUCE_POOL_WINDOW_USECS;
889 uint32_t wq_max_timer_interval_usecs = WQ_MAX_TIMER_INTERVAL_USECS;
890 uint32_t wq_max_threads = WORKQUEUE_MAXTHREADS;
891 uint32_t wq_max_constrained_threads = WORKQUEUE_MAXTHREADS / 8;
892
893
894 SYSCTL_INT(_kern, OID_AUTO, wq_yielded_threshold, CTLFLAG_RW | CTLFLAG_LOCKED,
895 &wq_yielded_threshold, 0, "");
896
897 SYSCTL_INT(_kern, OID_AUTO, wq_yielded_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
898 &wq_yielded_window_usecs, 0, "");
899
900 SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
901 &wq_stalled_window_usecs, 0, "");
902
903 SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
904 &wq_reduce_pool_window_usecs, 0, "");
905
906 SYSCTL_INT(_kern, OID_AUTO, wq_max_timer_interval_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
907 &wq_max_timer_interval_usecs, 0, "");
908
909 SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
910 &wq_max_threads, 0, "");
911
912 SYSCTL_INT(_kern, OID_AUTO, wq_max_constrained_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
913 &wq_max_constrained_threads, 0, "");
914
915
916 static uint32_t wq_init_constrained_limit = 1;
917
918
919 void
920 _workqueue_init_lock(proc_t p)
921 {
922 lck_spin_init(pthread_kern->proc_get_wqlockptr(p), pthread_lck_grp, pthread_lck_attr);
923 *(pthread_kern->proc_get_wqinitingptr(p)) = FALSE;
924 }
925
926 void
927 _workqueue_destroy_lock(proc_t p)
928 {
929 lck_spin_destroy(pthread_kern->proc_get_wqlockptr(p), pthread_lck_grp);
930 }
931
932
933 static void
934 workqueue_lock_spin(proc_t p)
935 {
936 lck_spin_lock(pthread_kern->proc_get_wqlockptr(p));
937 }
938
939 static void
940 workqueue_unlock(proc_t p)
941 {
942 lck_spin_unlock(pthread_kern->proc_get_wqlockptr(p));
943 }
944
945
946 static void
947 workqueue_interval_timer_start(struct workqueue *wq)
948 {
949 uint64_t deadline;
950
951 if (wq->wq_timer_interval == 0) {
952 wq->wq_timer_interval = wq_stalled_window_usecs;
953
954 } else {
955 wq->wq_timer_interval = wq->wq_timer_interval * 2;
956
957 if (wq->wq_timer_interval > wq_max_timer_interval_usecs) {
958 wq->wq_timer_interval = wq_max_timer_interval_usecs;
959 }
960 }
961 clock_interval_to_deadline(wq->wq_timer_interval, 1000, &deadline);
962
963 thread_call_enter_delayed(wq->wq_atimer_call, deadline);
964
965 PTHREAD_TRACE(TRACE_wq_start_add_timer, wq, wq->wq_reqcount, wq->wq_flags, wq->wq_timer_interval, 0);
966 }
967
968
969 static boolean_t
970 wq_thread_is_busy(uint64_t cur_ts, uint64_t *lastblocked_tsp)
971 {
972 clock_sec_t secs;
973 clock_usec_t usecs;
974 uint64_t lastblocked_ts;
975 uint64_t elapsed;
976
977 /*
978 * the timestamp is updated atomically w/o holding the workqueue lock
979 * so we need to do an atomic read of the 64 bits so that we don't see
980 * a mismatched pair of 32 bit reads... we accomplish this in an architecturally
981 * independent fashion by using OSCompareAndSwap64 to write back the
982 * value we grabbed... if it succeeds, then we have a good timestamp to
983 * evaluate... if it fails, we straddled grabbing the timestamp while it
984 * was being updated... treat a failed update as a busy thread since
985 * it implies we are about to see a really fresh timestamp anyway
986 */
987 lastblocked_ts = *lastblocked_tsp;
988
989 if ( !OSCompareAndSwap64((UInt64)lastblocked_ts, (UInt64)lastblocked_ts, lastblocked_tsp))
990 return (TRUE);
991
992 if (lastblocked_ts >= cur_ts) {
993 /*
994 * because the update of the timestamp when a thread blocks isn't
995 * serialized against us looking at it (i.e. we don't hold the workq lock)
996 * it's possible to have a timestamp that matches the current time or
997 * that even looks to be in the future relative to when we grabbed the current
998 * time... just treat this as a busy thread since it must have just blocked.
999 */
1000 return (TRUE);
1001 }
1002 elapsed = cur_ts - lastblocked_ts;
1003
1004 pthread_kern->absolutetime_to_microtime(elapsed, &secs, &usecs);
1005
1006 if (secs == 0 && usecs < wq_stalled_window_usecs)
1007 return (TRUE);
1008 return (FALSE);
1009 }
1010
1011
1012 #define WQ_TIMER_NEEDED(wq, start_timer) do { \
1013 int oldflags = wq->wq_flags; \
1014 \
1015 if ( !(oldflags & (WQ_EXITING | WQ_ATIMER_RUNNING))) { \
1016 if (OSCompareAndSwap(oldflags, oldflags | WQ_ATIMER_RUNNING, (UInt32 *)&wq->wq_flags)) \
1017 start_timer = TRUE; \
1018 } \
1019 } while (0)
1020
1021
1022
1023 static void
1024 workqueue_add_timer(struct workqueue *wq, __unused int param1)
1025 {
1026 proc_t p;
1027 boolean_t start_timer = FALSE;
1028 boolean_t retval;
1029 boolean_t add_thread;
1030 uint32_t busycount;
1031
1032 PTHREAD_TRACE(TRACE_wq_add_timer | DBG_FUNC_START, wq, wq->wq_flags, wq->wq_nthreads, wq->wq_thidlecount, 0);
1033
1034 p = wq->wq_proc;
1035
1036 workqueue_lock_spin(p);
1037
1038 /*
1039 * because workqueue_callback now runs w/o taking the workqueue lock
1040 * we are unsynchronized w/r to a change in state of the running threads...
1041 * to make sure we always evaluate that change, we allow it to start up
1042 * a new timer if the current one is actively evalutating the state
1043 * however, we do not need more than 2 timers fired up (1 active and 1 pending)
1044 * and we certainly do not want 2 active timers evaluating the state
1045 * simultaneously... so use WQL_ATIMER_BUSY to serialize the timers...
1046 * note that WQL_ATIMER_BUSY is in a different flag word from WQ_ATIMER_RUNNING since
1047 * it is always protected by the workq lock... WQ_ATIMER_RUNNING is evaluated
1048 * and set atomimcally since the callback function needs to manipulate it
1049 * w/o holding the workq lock...
1050 *
1051 * !WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY == no pending timer, no active timer
1052 * !WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY == no pending timer, 1 active timer
1053 * WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY == 1 pending timer, no active timer
1054 * WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY == 1 pending timer, 1 active timer
1055 */
1056 while (wq->wq_lflags & WQL_ATIMER_BUSY) {
1057 wq->wq_lflags |= WQL_ATIMER_WAITING;
1058
1059 assert_wait((caddr_t)wq, (THREAD_UNINT));
1060 workqueue_unlock(p);
1061
1062 thread_block(THREAD_CONTINUE_NULL);
1063
1064 workqueue_lock_spin(p);
1065 }
1066 wq->wq_lflags |= WQL_ATIMER_BUSY;
1067
1068 /*
1069 * the workq lock will protect us from seeing WQ_EXITING change state, but we
1070 * still need to update this atomically in case someone else tries to start
1071 * the timer just as we're releasing it
1072 */
1073 while ( !(OSCompareAndSwap(wq->wq_flags, (wq->wq_flags & ~WQ_ATIMER_RUNNING), (UInt32 *)&wq->wq_flags)));
1074
1075 again:
1076 retval = TRUE;
1077 add_thread = FALSE;
1078
1079 if ( !(wq->wq_flags & WQ_EXITING)) {
1080 /*
1081 * check to see if the stall frequency was beyond our tolerance
1082 * or we have work on the queue, but haven't scheduled any
1083 * new work within our acceptable time interval because
1084 * there were no idle threads left to schedule
1085 */
1086 if (wq->wq_reqcount) {
1087 uint32_t priclass;
1088 uint32_t thactive_count;
1089 uint32_t i;
1090 uint64_t curtime;
1091
1092 for (priclass = 0; priclass < WORKQUEUE_NUM_BUCKETS; priclass++) {
1093 if (wq->wq_requests[priclass])
1094 break;
1095 }
1096 assert(priclass < WORKQUEUE_NUM_BUCKETS);
1097
1098 curtime = mach_absolute_time();
1099 busycount = 0;
1100 thactive_count = 0;
1101
1102 /*
1103 * check for conditions under which we would not add a thread, either
1104 * a) we've got as many running threads as we want in this priority
1105 * band and the priority bands above it
1106 *
1107 * b) check to see if the priority group has blocked threads, if the
1108 * last blocked timestamp is old enough, we will have already passed
1109 * (a) where we would have stopped if we had enough active threads.
1110 */
1111 for (i = 0; i <= priclass; i++) {
1112
1113 thactive_count += wq->wq_thactive_count[i];
1114
1115 if (wq->wq_thscheduled_count[i]) {
1116 if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i]))
1117 busycount++;
1118 }
1119 }
1120 if (thactive_count + busycount < wq->wq_max_concurrency) {
1121
1122 if (wq->wq_thidlecount == 0) {
1123 /*
1124 * if we have no idle threads, try to add one
1125 */
1126 retval = workqueue_addnewthread(wq, FALSE);
1127 }
1128 add_thread = TRUE;
1129 }
1130
1131 if (wq->wq_reqcount) {
1132 /*
1133 * as long as we have threads to schedule, and we successfully
1134 * scheduled new work, keep trying
1135 */
1136 while (wq->wq_thidlecount && !(wq->wq_flags & WQ_EXITING)) {
1137 /*
1138 * workqueue_run_nextreq is responsible for
1139 * dropping the workqueue lock in all cases
1140 */
1141 retval = workqueue_run_nextreq(p, wq, THREAD_NULL, FALSE, FALSE, 0);
1142 workqueue_lock_spin(p);
1143
1144 if (retval == FALSE)
1145 break;
1146 }
1147 if ( !(wq->wq_flags & WQ_EXITING) && wq->wq_reqcount) {
1148
1149 if (wq->wq_thidlecount == 0 && retval == TRUE && add_thread == TRUE)
1150 goto again;
1151
1152 if (wq->wq_thidlecount == 0 || busycount)
1153 WQ_TIMER_NEEDED(wq, start_timer);
1154
1155 PTHREAD_TRACE(TRACE_wq_add_timer | DBG_FUNC_NONE, wq, wq->wq_reqcount, wq->wq_thidlecount, busycount, 0);
1156 }
1157 }
1158 }
1159 }
1160 if ( !(wq->wq_flags & WQ_ATIMER_RUNNING))
1161 wq->wq_timer_interval = 0;
1162
1163 wq->wq_lflags &= ~WQL_ATIMER_BUSY;
1164
1165 if ((wq->wq_flags & WQ_EXITING) || (wq->wq_lflags & WQL_ATIMER_WAITING)) {
1166 /*
1167 * wakeup the thread hung up in workqueue_exit or workqueue_add_timer waiting for this timer
1168 * to finish getting out of the way
1169 */
1170 wq->wq_lflags &= ~WQL_ATIMER_WAITING;
1171 wakeup(wq);
1172 }
1173
1174 PTHREAD_TRACE(TRACE_wq_add_timer | DBG_FUNC_END, wq, start_timer, wq->wq_nthreads, wq->wq_thidlecount, 0);
1175
1176 workqueue_unlock(p);
1177
1178 if (start_timer == TRUE)
1179 workqueue_interval_timer_start(wq);
1180 }
1181
1182
1183 void
1184 _workqueue_thread_yielded(void)
1185 {
1186 struct workqueue *wq;
1187 proc_t p;
1188
1189 p = current_proc();
1190
1191 if ((wq = pthread_kern->proc_get_wqptr(p)) == NULL || wq->wq_reqcount == 0)
1192 return;
1193
1194 workqueue_lock_spin(p);
1195
1196 if (wq->wq_reqcount) {
1197 uint64_t curtime;
1198 uint64_t elapsed;
1199 clock_sec_t secs;
1200 clock_usec_t usecs;
1201
1202 if (wq->wq_thread_yielded_count++ == 0)
1203 wq->wq_thread_yielded_timestamp = mach_absolute_time();
1204
1205 if (wq->wq_thread_yielded_count < wq_yielded_threshold) {
1206 workqueue_unlock(p);
1207 return;
1208 }
1209
1210 PTHREAD_TRACE(TRACE_wq_thread_yielded | DBG_FUNC_START, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 0, 0);
1211
1212 wq->wq_thread_yielded_count = 0;
1213
1214 curtime = mach_absolute_time();
1215 elapsed = curtime - wq->wq_thread_yielded_timestamp;
1216 pthread_kern->absolutetime_to_microtime(elapsed, &secs, &usecs);
1217
1218 if (secs == 0 && usecs < wq_yielded_window_usecs) {
1219
1220 if (wq->wq_thidlecount == 0) {
1221 workqueue_addnewthread(wq, TRUE);
1222 /*
1223 * 'workqueue_addnewthread' drops the workqueue lock
1224 * when creating the new thread and then retakes it before
1225 * returning... this window allows other threads to process
1226 * requests, so we need to recheck for available work
1227 * if none found, we just return... the newly created thread
1228 * will eventually get used (if it hasn't already)...
1229 */
1230 if (wq->wq_reqcount == 0) {
1231 workqueue_unlock(p);
1232 return;
1233 }
1234 }
1235 if (wq->wq_thidlecount) {
1236 uint32_t priority;
1237 boolean_t overcommit = FALSE;
1238 boolean_t force_oc = FALSE;
1239
1240 for (priority = 0; priority < WORKQUEUE_NUM_BUCKETS; priority++) {
1241 if (wq->wq_requests[priority]) {
1242 break;
1243 }
1244 }
1245 assert(priority < WORKQUEUE_NUM_BUCKETS);
1246
1247 wq->wq_reqcount--;
1248 wq->wq_requests[priority]--;
1249
1250 if (wq->wq_ocrequests[priority]) {
1251 wq->wq_ocrequests[priority]--;
1252 overcommit = TRUE;
1253 } else
1254 force_oc = TRUE;
1255
1256 (void)workqueue_run_nextreq(p, wq, THREAD_NULL, force_oc, overcommit, pthread_priority_from_class_index(priority));
1257 /*
1258 * workqueue_run_nextreq is responsible for
1259 * dropping the workqueue lock in all cases
1260 */
1261 PTHREAD_TRACE(TRACE_wq_thread_yielded | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 1, 0);
1262
1263 return;
1264 }
1265 }
1266 PTHREAD_TRACE(TRACE_wq_thread_yielded | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 2, 0);
1267 }
1268 workqueue_unlock(p);
1269 }
1270
1271
1272
1273 static void
1274 workqueue_callback(int type, thread_t thread)
1275 {
1276 struct uthread *uth;
1277 struct threadlist *tl;
1278 struct workqueue *wq;
1279
1280 uth = pthread_kern->get_bsdthread_info(thread);
1281 tl = pthread_kern->uthread_get_threadlist(uth);
1282 wq = tl->th_workq;
1283
1284 switch (type) {
1285 case SCHED_CALL_BLOCK: {
1286 uint32_t old_activecount;
1287 boolean_t start_timer = FALSE;
1288
1289 old_activecount = OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority]);
1290
1291 if (old_activecount == wq->wq_reqconc[tl->th_priority]) {
1292 uint64_t curtime;
1293 UInt64 *lastblocked_ptr;
1294
1295 /*
1296 * the number of active threads at this priority
1297 * has fallen below the maximum number of concurrent
1298 * threads that we're allowed to run
1299 */
1300 lastblocked_ptr = (UInt64 *)&wq->wq_lastblocked_ts[tl->th_priority];
1301 curtime = mach_absolute_time();
1302
1303 /*
1304 * if we collide with another thread trying to update the last_blocked (really unlikely
1305 * since another thread would have to get scheduled and then block after we start down
1306 * this path), it's not a problem. Either timestamp is adequate, so no need to retry
1307 */
1308
1309 OSCompareAndSwap64(*lastblocked_ptr, (UInt64)curtime, lastblocked_ptr);
1310
1311 if (wq->wq_reqcount) {
1312 /*
1313 * we have work to do so start up the timer
1314 * if it's not running... we'll let it sort
1315 * out whether we really need to start up
1316 * another thread
1317 */
1318 WQ_TIMER_NEEDED(wq, start_timer);
1319 }
1320
1321 if (start_timer == TRUE) {
1322 workqueue_interval_timer_start(wq);
1323 }
1324 }
1325 PTHREAD_TRACE1(TRACE_wq_thread_block | DBG_FUNC_START, wq, old_activecount, tl->th_priority, start_timer, thread_tid(thread));
1326 break;
1327 }
1328 case SCHED_CALL_UNBLOCK:
1329 /*
1330 * we cannot take the workqueue_lock here...
1331 * an UNBLOCK can occur from a timer event which
1332 * is run from an interrupt context... if the workqueue_lock
1333 * is already held by this processor, we'll deadlock...
1334 * the thread lock for the thread being UNBLOCKED
1335 * is also held
1336 */
1337 OSAddAtomic(1, &wq->wq_thactive_count[tl->th_priority]);
1338
1339 PTHREAD_TRACE1(TRACE_wq_thread_block | DBG_FUNC_END, wq, wq->wq_threads_scheduled, tl->th_priority, 0, thread_tid(thread));
1340
1341 break;
1342 }
1343 }
1344
1345 sched_call_t
1346 _workqueue_get_sched_callback(void)
1347 {
1348 return workqueue_callback;
1349 }
1350
1351 static void
1352 workqueue_removethread(struct threadlist *tl, int fromexit)
1353 {
1354 struct workqueue *wq;
1355 struct uthread * uth;
1356
1357 /*
1358 * If fromexit is set, the call is from workqueue_exit(,
1359 * so some cleanups are to be avoided.
1360 */
1361 wq = tl->th_workq;
1362
1363 TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
1364
1365 if (fromexit == 0) {
1366 wq->wq_nthreads--;
1367 wq->wq_thidlecount--;
1368 }
1369
1370 /*
1371 * Clear the threadlist pointer in uthread so
1372 * blocked thread on wakeup for termination will
1373 * not access the thread list as it is going to be
1374 * freed.
1375 */
1376 pthread_kern->thread_sched_call(tl->th_thread, NULL);
1377
1378 uth = pthread_kern->get_bsdthread_info(tl->th_thread);
1379 if (uth != (struct uthread *)0) {
1380 pthread_kern->uthread_set_threadlist(uth, NULL);
1381 }
1382 if (fromexit == 0) {
1383 /* during exit the lock is not held */
1384 workqueue_unlock(wq->wq_proc);
1385 }
1386
1387 if ( (tl->th_flags & TH_LIST_SUSPENDED) ) {
1388 /*
1389 * thread was created, but never used...
1390 * need to clean up the stack and port ourselves
1391 * since we're not going to spin up through the
1392 * normal exit path triggered from Libc
1393 */
1394 if (fromexit == 0) {
1395 /* vm map is already deallocated when this is called from exit */
1396 (void)mach_vm_deallocate(wq->wq_map, tl->th_stackaddr, tl->th_allocsize);
1397 }
1398 (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(wq->wq_task), tl->th_thport);
1399
1400 PTHREAD_TRACE1(TRACE_wq_thread_suspend | DBG_FUNC_END, wq, (uintptr_t)thread_tid(current_thread()), wq->wq_nthreads, 0xdead, thread_tid(tl->th_thread));
1401 } else {
1402
1403 PTHREAD_TRACE1(TRACE_wq_thread_park | DBG_FUNC_END, wq, (uintptr_t)thread_tid(current_thread()), wq->wq_nthreads, 0xdead, thread_tid(tl->th_thread));
1404 }
1405 /*
1406 * drop our ref on the thread
1407 */
1408 thread_deallocate(tl->th_thread);
1409
1410 kfree(tl, sizeof(struct threadlist));
1411 }
1412
1413
1414 /*
1415 * called with workq lock held
1416 * dropped and retaken around thread creation
1417 * return with workq lock held
1418 */
1419 static boolean_t
1420 workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread)
1421 {
1422 struct threadlist *tl;
1423 struct uthread *uth;
1424 kern_return_t kret;
1425 thread_t th;
1426 proc_t p;
1427 void *sright;
1428 mach_vm_offset_t stackaddr;
1429 mach_vm_size_t guardsize;
1430
1431 if ((wq->wq_flags & WQ_EXITING) == WQ_EXITING)
1432 return (FALSE);
1433
1434 if (wq->wq_nthreads >= wq_max_threads || wq->wq_nthreads >= (pthread_kern->config_thread_max - 20)) {
1435 wq->wq_lflags |= WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
1436 return (FALSE);
1437 }
1438 wq->wq_lflags &= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
1439
1440 if (oc_thread == FALSE && wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
1441 /*
1442 * if we're not creating this thread to service an overcommit request,
1443 * then check the size of the constrained thread pool... if we've already
1444 * reached our max for threads scheduled from this pool, don't create a new
1445 * one... the callers of this function are prepared for failure.
1446 */
1447 wq->wq_lflags |= WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
1448 return (FALSE);
1449 }
1450 if (wq->wq_constrained_threads_scheduled < wq_max_constrained_threads)
1451 wq->wq_lflags &= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
1452
1453 wq->wq_nthreads++;
1454
1455 p = wq->wq_proc;
1456 workqueue_unlock(p);
1457
1458 kret = pthread_kern->thread_create_workq(wq->wq_task, (thread_continue_t)wq_unsuspend_continue, &th);
1459 if (kret != KERN_SUCCESS) {
1460 goto failed;
1461 }
1462
1463 tl = kalloc(sizeof(struct threadlist));
1464 bzero(tl, sizeof(struct threadlist));
1465
1466 #if defined(__i386__) || defined(__x86_64__)
1467 stackaddr = 0xB0000000;
1468 #else
1469 #error Need to define a stack address hint for this architecture
1470 #endif
1471
1472 guardsize = vm_map_page_size(wq->wq_map);
1473 tl->th_allocsize = PTH_DEFAULT_STACKSIZE + guardsize + pthread_kern->proc_get_pthsize(p);
1474
1475 kret = mach_vm_map(wq->wq_map, &stackaddr,
1476 tl->th_allocsize,
1477 page_size-1,
1478 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
1479 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
1480 VM_INHERIT_DEFAULT);
1481
1482 if (kret != KERN_SUCCESS) {
1483 kret = mach_vm_allocate(wq->wq_map,
1484 &stackaddr, tl->th_allocsize,
1485 VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
1486 }
1487 if (kret == KERN_SUCCESS) {
1488 /*
1489 * The guard page is at the lowest address
1490 * The stack base is the highest address
1491 */
1492 kret = mach_vm_protect(wq->wq_map, stackaddr, guardsize, FALSE, VM_PROT_NONE);
1493
1494 if (kret != KERN_SUCCESS)
1495 (void) mach_vm_deallocate(wq->wq_map, stackaddr, tl->th_allocsize);
1496 }
1497 if (kret != KERN_SUCCESS) {
1498 (void) thread_terminate(th);
1499 thread_deallocate(th);
1500
1501 kfree(tl, sizeof(struct threadlist));
1502 goto failed;
1503 }
1504 thread_reference(th);
1505
1506 sright = (void *)pthread_kern->convert_thread_to_port(th);
1507 tl->th_thport = pthread_kern->ipc_port_copyout_send(sright, pthread_kern->task_get_ipcspace(wq->wq_task));
1508
1509 pthread_kern->thread_static_param(th, TRUE);
1510
1511 tl->th_flags = TH_LIST_INITED | TH_LIST_SUSPENDED;
1512
1513 tl->th_thread = th;
1514 tl->th_workq = wq;
1515 tl->th_stackaddr = stackaddr;
1516 tl->th_priority = WORKQUEUE_NUM_BUCKETS;
1517 tl->th_policy = -1;
1518
1519 uth = pthread_kern->get_bsdthread_info(tl->th_thread);
1520
1521 workqueue_lock_spin(p);
1522
1523 pthread_kern->uthread_set_threadlist(uth, tl);
1524 TAILQ_INSERT_TAIL(&wq->wq_thidlelist, tl, th_entry);
1525
1526 wq->wq_thidlecount++;
1527
1528 PTHREAD_TRACE1(TRACE_wq_thread_suspend | DBG_FUNC_START, wq, wq->wq_nthreads, 0, thread_tid(current_thread()), thread_tid(tl->th_thread));
1529
1530 return (TRUE);
1531
1532 failed:
1533 workqueue_lock_spin(p);
1534 wq->wq_nthreads--;
1535
1536 return (FALSE);
1537 }
1538
1539
1540 int
1541 _workq_open(struct proc *p, __unused int32_t *retval)
1542 {
1543 struct workqueue * wq;
1544 int wq_size;
1545 char * ptr;
1546 uint32_t i;
1547 uint32_t num_cpus;
1548 int error = 0;
1549 boolean_t need_wakeup = FALSE;
1550
1551 if (pthread_kern->proc_get_register(p) == 0) {
1552 return EINVAL;
1553 }
1554
1555 num_cpus = pthread_kern->ml_get_max_cpus();
1556
1557 if (wq_init_constrained_limit) {
1558 uint32_t limit;
1559 /*
1560 * set up the limit for the constrained pool
1561 * this is a virtual pool in that we don't
1562 * maintain it on a separate idle and run list
1563 */
1564 limit = num_cpus * WORKQUEUE_CONSTRAINED_FACTOR;
1565
1566 if (limit > wq_max_constrained_threads)
1567 wq_max_constrained_threads = limit;
1568
1569 wq_init_constrained_limit = 0;
1570 }
1571 workqueue_lock_spin(p);
1572
1573 if (pthread_kern->proc_get_wqptr(p) == NULL) {
1574
1575 while (*pthread_kern->proc_get_wqinitingptr(p) == TRUE) {
1576
1577 assert_wait((caddr_t)pthread_kern->proc_get_wqinitingptr(p), THREAD_UNINT);
1578 workqueue_unlock(p);
1579
1580 thread_block(THREAD_CONTINUE_NULL);
1581
1582 workqueue_lock_spin(p);
1583 }
1584 if (pthread_kern->proc_get_wqptr(p) != NULL) {
1585 goto out;
1586 }
1587
1588 *(pthread_kern->proc_get_wqinitingptr(p)) = TRUE;
1589
1590 workqueue_unlock(p);
1591
1592 wq_size = sizeof(struct workqueue);
1593
1594 ptr = (char *)kalloc(wq_size);
1595 bzero(ptr, wq_size);
1596
1597 wq = (struct workqueue *)ptr;
1598 wq->wq_flags = WQ_LIST_INITED;
1599 wq->wq_proc = p;
1600 wq->wq_max_concurrency = num_cpus;
1601 wq->wq_task = current_task();
1602 wq->wq_map = pthread_kern->current_map();
1603
1604 for (i = 0; i < WORKQUEUE_NUM_BUCKETS; i++)
1605 wq->wq_reqconc[i] = (uint16_t)wq->wq_max_concurrency;
1606
1607 TAILQ_INIT(&wq->wq_thrunlist);
1608 TAILQ_INIT(&wq->wq_thidlelist);
1609
1610 wq->wq_atimer_call = thread_call_allocate((thread_call_func_t)workqueue_add_timer, (thread_call_param_t)wq);
1611
1612 workqueue_lock_spin(p);
1613
1614 pthread_kern->proc_set_wqptr(p, wq);
1615 pthread_kern->proc_set_wqsize(p, wq_size);
1616
1617 *(pthread_kern->proc_get_wqinitingptr(p)) = FALSE;
1618 need_wakeup = TRUE;
1619 }
1620 out:
1621 workqueue_unlock(p);
1622
1623 if (need_wakeup == TRUE) {
1624 wakeup(pthread_kern->proc_get_wqinitingptr(p));
1625 }
1626 return(error);
1627 }
1628
1629
1630 int
1631 _workq_kernreturn(struct proc *p,
1632 int options,
1633 __unused user_addr_t item,
1634 int arg2,
1635 int arg3,
1636 __unused int32_t *retval)
1637 {
1638 struct workqueue *wq;
1639 int error = 0;
1640
1641 if (pthread_kern->proc_get_register(p) == 0) {
1642 return EINVAL;
1643 }
1644
1645 switch (options) {
1646 case WQOPS_QUEUE_NEWSPISUPP: {
1647 /*
1648 * arg2 = offset of serialno into dispatch queue
1649 */
1650 int offset = arg2;
1651
1652 pthread_kern->proc_set_dispatchqueue_serialno_offset(p, (uint64_t)offset);
1653 break;
1654 }
1655 case WQOPS_QUEUE_REQTHREADS: {
1656 /*
1657 * arg2 = number of threads to start
1658 * arg3 = priority
1659 */
1660 boolean_t overcommit = FALSE;
1661 int reqcount = arg2;
1662 pthread_priority_t priority = arg3;
1663 int class;
1664
1665 overcommit = (_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0;
1666 class = pthread_priority_get_class_index(priority);
1667
1668 if ((reqcount <= 0) || (class < 0) || (class >= WORKQUEUE_NUM_BUCKETS)) {
1669 error = EINVAL;
1670 break;
1671 }
1672
1673 workqueue_lock_spin(p);
1674
1675 if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) {
1676 workqueue_unlock(p);
1677
1678 error = EINVAL;
1679 break;
1680 }
1681
1682 if (!overcommit) {
1683 wq->wq_reqcount += reqcount;
1684 wq->wq_requests[class] += reqcount;
1685
1686 PTHREAD_TRACE(TRACE_wq_req_threads | DBG_FUNC_NONE, wq, priority, wq->wq_requests[class], reqcount, 0);
1687
1688 while (wq->wq_reqcount) {
1689 if (!workqueue_run_one(p, wq, overcommit, priority))
1690 break;
1691 }
1692 } else {
1693 PTHREAD_TRACE(TRACE_wq_req_octhreads | DBG_FUNC_NONE, wq, priority, wq->wq_requests[class], reqcount, 0);
1694
1695 while (reqcount) {
1696 if (!workqueue_run_one(p, wq, overcommit, priority))
1697 break;
1698 reqcount--;
1699 }
1700 if (reqcount) {
1701 /*
1702 * we need to delay starting some of the overcommit requests...
1703 * we should only fail to create the overcommit threads if
1704 * we're at the max thread limit... as existing threads
1705 * return to the kernel, we'll notice the ocrequests
1706 * and spin them back to user space as the overcommit variety
1707 */
1708 wq->wq_reqcount += reqcount;
1709 wq->wq_requests[class] += reqcount;
1710 wq->wq_ocrequests[class] += reqcount;
1711
1712 PTHREAD_TRACE(TRACE_wq_delay_octhreads | DBG_FUNC_NONE, wq, priority, wq->wq_requests[class], reqcount, 0);
1713 }
1714 }
1715 workqueue_unlock(p);
1716 break;
1717 }
1718
1719 case WQOPS_THREAD_RETURN: {
1720 thread_t th = current_thread();
1721 struct uthread *uth = pthread_kern->get_bsdthread_info(th);
1722 struct threadlist *tl = util_get_thread_threadlist_entry(th);
1723
1724 /* reset signal mask on the workqueue thread to default state */
1725 if (pthread_kern->uthread_get_sigmask(uth) != (sigset_t)(~workq_threadmask)) {
1726 pthread_kern->proc_lock(p);
1727 pthread_kern->uthread_set_sigmask(uth, ~workq_threadmask);
1728 pthread_kern->proc_unlock(p);
1729 }
1730
1731 /* dropping WQ override counts has to be done outside the wq lock. */
1732 wq_thread_override_reset(th);
1733
1734 workqueue_lock_spin(p);
1735
1736 if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL || !tl) {
1737 workqueue_unlock(p);
1738
1739 error = EINVAL;
1740 break;
1741 }
1742 PTHREAD_TRACE(TRACE_wq_runitem | DBG_FUNC_END, wq, 0, 0, 0, 0);
1743
1744
1745 (void)workqueue_run_nextreq(p, wq, th, FALSE, FALSE, 0);
1746 /*
1747 * workqueue_run_nextreq is responsible for
1748 * dropping the workqueue lock in all cases
1749 */
1750 break;
1751 }
1752
1753 default:
1754 error = EINVAL;
1755 break;
1756 }
1757 return (error);
1758 }
1759
1760 /*
1761 * Routine: workqueue_mark_exiting
1762 *
1763 * Function: Mark the work queue such that new threads will not be added to the
1764 * work queue after we return.
1765 *
1766 * Conditions: Called against the current process.
1767 */
1768 void
1769 _workqueue_mark_exiting(struct proc *p)
1770 {
1771 struct workqueue *wq = pthread_kern->proc_get_wqptr(p);
1772
1773 if (wq != NULL) {
1774
1775 PTHREAD_TRACE(TRACE_wq_pthread_exit|DBG_FUNC_START, wq, 0, 0, 0, 0);
1776
1777 workqueue_lock_spin(p);
1778
1779 /*
1780 * we now arm the timer in the callback function w/o holding the workq lock...
1781 * we do this by setting WQ_ATIMER_RUNNING via OSCompareAndSwap in order to
1782 * insure only a single timer if running and to notice that WQ_EXITING has
1783 * been set (we don't want to start a timer once WQ_EXITING is posted)
1784 *
1785 * so once we have successfully set WQ_EXITING, we cannot fire up a new timer...
1786 * therefor no need to clear the timer state atomically from the flags
1787 *
1788 * since we always hold the workq lock when dropping WQ_ATIMER_RUNNING
1789 * the check for and sleep until clear is protected
1790 */
1791 while (!(OSCompareAndSwap(wq->wq_flags, (wq->wq_flags | WQ_EXITING), (UInt32 *)&wq->wq_flags)));
1792
1793 if (wq->wq_flags & WQ_ATIMER_RUNNING) {
1794 if (thread_call_cancel(wq->wq_atimer_call) == TRUE) {
1795 wq->wq_flags &= ~WQ_ATIMER_RUNNING;
1796 }
1797 }
1798 while ((wq->wq_flags & WQ_ATIMER_RUNNING) || (wq->wq_lflags & WQL_ATIMER_BUSY)) {
1799 assert_wait((caddr_t)wq, (THREAD_UNINT));
1800 workqueue_unlock(p);
1801
1802 thread_block(THREAD_CONTINUE_NULL);
1803
1804 workqueue_lock_spin(p);
1805 }
1806 workqueue_unlock(p);
1807
1808 PTHREAD_TRACE(TRACE_wq_pthread_exit|DBG_FUNC_END, 0, 0, 0, 0, 0);
1809 }
1810 }
1811
1812 /*
1813 * Routine: workqueue_exit
1814 *
1815 * Function: clean up the work queue structure(s) now that there are no threads
1816 * left running inside the work queue (except possibly current_thread).
1817 *
1818 * Conditions: Called by the last thread in the process.
1819 * Called against current process.
1820 */
1821 void
1822 _workqueue_exit(struct proc *p)
1823 {
1824 struct workqueue * wq;
1825 struct threadlist * tl, *tlist;
1826 struct uthread *uth;
1827 int wq_size = 0;
1828
1829 wq = pthread_kern->proc_get_wqptr(p);
1830 if (wq != NULL) {
1831
1832 PTHREAD_TRACE(TRACE_wq_workqueue_exit|DBG_FUNC_START, wq, 0, 0, 0, 0);
1833
1834 wq_size = pthread_kern->proc_get_wqsize(p);
1835 pthread_kern->proc_set_wqptr(p, NULL);
1836 pthread_kern->proc_set_wqsize(p, 0);
1837
1838 /*
1839 * Clean up workqueue data structures for threads that exited and
1840 * didn't get a chance to clean up after themselves.
1841 */
1842 TAILQ_FOREACH_SAFE(tl, &wq->wq_thrunlist, th_entry, tlist) {
1843 pthread_kern->thread_sched_call(tl->th_thread, NULL);
1844
1845 uth = pthread_kern->get_bsdthread_info(tl->th_thread);
1846 if (uth != (struct uthread *)0) {
1847 pthread_kern->uthread_set_threadlist(uth, NULL);
1848 }
1849 TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
1850
1851 /*
1852 * drop our last ref on the thread
1853 */
1854 thread_deallocate(tl->th_thread);
1855
1856 kfree(tl, sizeof(struct threadlist));
1857 }
1858 TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlelist, th_entry, tlist) {
1859 workqueue_removethread(tl, 1);
1860 }
1861 thread_call_free(wq->wq_atimer_call);
1862
1863 kfree(wq, wq_size);
1864
1865 PTHREAD_TRACE(TRACE_wq_workqueue_exit|DBG_FUNC_END, 0, 0, 0, 0, 0);
1866 }
1867 }
1868
1869
1870 static boolean_t
1871 workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, pthread_priority_t priority)
1872 {
1873 boolean_t ran_one;
1874
1875 if (wq->wq_thidlecount == 0) {
1876 if (overcommit == FALSE) {
1877 if (wq->wq_constrained_threads_scheduled < wq->wq_max_concurrency)
1878 workqueue_addnewthread(wq, overcommit);
1879 } else {
1880 workqueue_addnewthread(wq, overcommit);
1881
1882 if (wq->wq_thidlecount == 0)
1883 return (FALSE);
1884 }
1885 }
1886 ran_one = workqueue_run_nextreq(p, wq, THREAD_NULL, FALSE, overcommit, priority);
1887 /*
1888 * workqueue_run_nextreq is responsible for
1889 * dropping the workqueue lock in all cases
1890 */
1891 workqueue_lock_spin(p);
1892
1893 return (ran_one);
1894 }
1895
1896
1897
1898 /*
1899 * workqueue_run_nextreq:
1900 * called with the workqueue lock held...
1901 * responsible for dropping it in all cases
1902 */
1903 static boolean_t
1904 workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t thread,
1905 boolean_t force_oc, boolean_t overcommit, pthread_priority_t oc_prio)
1906 {
1907 thread_t th_to_run = THREAD_NULL;
1908 thread_t th_to_park = THREAD_NULL;
1909 int wake_thread = 0;
1910 int reuse_thread = WQ_FLAG_THREAD_REUSE;
1911 uint32_t priclass, orig_class;
1912 uint32_t us_to_wait;
1913 struct threadlist *tl = NULL;
1914 struct uthread *uth = NULL;
1915 boolean_t start_timer = FALSE;
1916 boolean_t adjust_counters = TRUE;
1917 uint64_t curtime;
1918 uint32_t thactive_count;
1919 uint32_t busycount;
1920
1921 PTHREAD_TRACE(TRACE_wq_run_nextitem|DBG_FUNC_START, wq, thread, wq->wq_thidlecount, wq->wq_reqcount, 0);
1922
1923 if (thread != THREAD_NULL) {
1924 uth = pthread_kern->get_bsdthread_info(thread);
1925
1926 if ((tl = pthread_kern->uthread_get_threadlist(uth)) == NULL) {
1927 panic("wq thread with no threadlist");
1928 }
1929 }
1930
1931 /*
1932 * from here until we drop the workq lock
1933 * we can't be pre-empted since we hold
1934 * the lock in spin mode... this is important
1935 * since we have to independently update the priority that
1936 * the thread is associated with and the priorty based
1937 * counters that "workqueue_callback" also changes and bases
1938 * decisons on.
1939 */
1940 dispatch_overcommit:
1941
1942 if (overcommit || force_oc) {
1943 priclass = pthread_priority_get_class_index(oc_prio);
1944
1945 if (thread != THREAD_NULL) {
1946 th_to_run = thread;
1947 goto pick_up_work;
1948 }
1949 goto grab_idle_thread;
1950 }
1951 if (wq->wq_reqcount) {
1952 for (priclass = 0; priclass < WORKQUEUE_NUM_BUCKETS; priclass++) {
1953 if (wq->wq_requests[priclass])
1954 break;
1955 }
1956 assert(priclass < WORKQUEUE_NUM_BUCKETS);
1957
1958 if (wq->wq_ocrequests[priclass] && (thread != THREAD_NULL || wq->wq_thidlecount)) {
1959 /*
1960 * handle delayed overcommit request...
1961 * they have priority over normal requests
1962 * within a given priority level
1963 */
1964 wq->wq_reqcount--;
1965 wq->wq_requests[priclass]--;
1966 wq->wq_ocrequests[priclass]--;
1967
1968 oc_prio = pthread_priority_from_class_index(priclass);
1969 overcommit = TRUE;
1970
1971 goto dispatch_overcommit;
1972 }
1973 }
1974 /*
1975 * if we get here, the work should be handled by a constrained thread
1976 */
1977 if (wq->wq_reqcount == 0 || wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
1978 /*
1979 * no work to do, or we're already at or over the scheduling limit for
1980 * constrained threads... just return or park the thread...
1981 * do not start the timer for this condition... if we don't have any work,
1982 * we'll check again when new work arrives... if we're over the limit, we need 1 or more
1983 * constrained threads to return to the kernel before we can dispatch additional work
1984 */
1985 if ((th_to_park = thread) == THREAD_NULL)
1986 goto out_of_work;
1987 goto parkit;
1988 }
1989
1990 thactive_count = 0;
1991 busycount = 0;
1992
1993 curtime = mach_absolute_time();
1994
1995 thactive_count += wq->wq_thactive_count[priclass];
1996
1997 if (wq->wq_thscheduled_count[priclass]) {
1998 if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[priclass])) {
1999 busycount++;
2000 }
2001 }
2002
2003 if (thread != THREAD_NULL) {
2004 if (tl->th_priority == priclass) {
2005 /*
2006 * dont't count this thread as currently active
2007 */
2008 thactive_count--;
2009 }
2010 }
2011 if (thactive_count + busycount >= wq->wq_max_concurrency) {
2012 if (busycount) {
2013 /*
2014 * we found at least 1 thread in the
2015 * 'busy' state... make sure we start
2016 * the timer because if they are the only
2017 * threads keeping us from scheduling
2018 * this work request, we won't get a callback
2019 * to kick off the timer... we need to
2020 * start it now...
2021 */
2022 WQ_TIMER_NEEDED(wq, start_timer);
2023 }
2024
2025 PTHREAD_TRACE(TRACE_wq_overcommitted|DBG_FUNC_NONE, wq, (start_timer ? 1<<7 : 0) | pthread_priority_from_class_index(priclass), thactive_count, busycount, 0);
2026
2027 if ((th_to_park = thread) == THREAD_NULL) {
2028 goto out_of_work;
2029 }
2030
2031 goto parkit;
2032 }
2033
2034 if (thread != THREAD_NULL) {
2035 /*
2036 * thread is non-NULL here when we return from userspace
2037 * in workq_kernreturn, rather than trying to find a thread
2038 * we pick up new work for this specific thread.
2039 */
2040 th_to_run = thread;
2041 goto pick_up_work;
2042 }
2043
2044 grab_idle_thread:
2045 if (wq->wq_thidlecount == 0) {
2046 /*
2047 * we have no additional threads waiting to pick up
2048 * work, however, there is additional work to do.
2049 */
2050 WQ_TIMER_NEEDED(wq, start_timer);
2051
2052 PTHREAD_TRACE(TRACE_wq_stalled, wq, wq->wq_nthreads, start_timer, 0, 0);
2053
2054 goto no_thread_to_run;
2055 }
2056
2057 /*
2058 * we already know there is both work available
2059 * and an idle thread, so activate a thread and then
2060 * fall into the code that pulls a new work request...
2061 */
2062 tl = TAILQ_FIRST(&wq->wq_thidlelist);
2063 TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
2064 wq->wq_thidlecount--;
2065
2066 TAILQ_INSERT_TAIL(&wq->wq_thrunlist, tl, th_entry);
2067
2068 if ((tl->th_flags & TH_LIST_SUSPENDED) == TH_LIST_SUSPENDED) {
2069 tl->th_flags &= ~TH_LIST_SUSPENDED;
2070 reuse_thread = 0;
2071
2072 } else if ((tl->th_flags & TH_LIST_BLOCKED) == TH_LIST_BLOCKED) {
2073 tl->th_flags &= ~TH_LIST_BLOCKED;
2074 wake_thread = 1;
2075 }
2076 tl->th_flags |= TH_LIST_RUNNING | TH_LIST_BUSY;
2077
2078 wq->wq_threads_scheduled++;
2079 wq->wq_thscheduled_count[priclass]++;
2080 OSAddAtomic(1, &wq->wq_thactive_count[priclass]);
2081
2082 adjust_counters = FALSE;
2083 th_to_run = tl->th_thread;
2084
2085 pick_up_work:
2086 if (!overcommit && !force_oc) {
2087 wq->wq_reqcount--;
2088 wq->wq_requests[priclass]--;
2089
2090 if ( !(tl->th_flags & TH_LIST_CONSTRAINED)) {
2091 wq->wq_constrained_threads_scheduled++;
2092 tl->th_flags |= TH_LIST_CONSTRAINED;
2093 }
2094 } else {
2095 if (tl->th_flags & TH_LIST_CONSTRAINED) {
2096 wq->wq_constrained_threads_scheduled--;
2097 tl->th_flags &= ~TH_LIST_CONSTRAINED;
2098 }
2099 }
2100
2101 orig_class = tl->th_priority;
2102 tl->th_priority = (uint8_t)priclass;
2103
2104 if (adjust_counters && (orig_class != priclass)) {
2105 /*
2106 * we need to adjust these counters based on this
2107 * thread's new disposition w/r to priority
2108 */
2109 OSAddAtomic(-1, &wq->wq_thactive_count[orig_class]);
2110 OSAddAtomic(1, &wq->wq_thactive_count[priclass]);
2111
2112 wq->wq_thscheduled_count[orig_class]--;
2113 wq->wq_thscheduled_count[priclass]++;
2114 }
2115 wq->wq_thread_yielded_count = 0;
2116
2117 workqueue_unlock(p);
2118
2119 if (orig_class != priclass) {
2120 pthread_priority_t pri = pthread_priority_from_class_index(priclass);
2121
2122 thread_qos_policy_data_t qosinfo;
2123
2124 /* Set the QoS tier on the thread, along with the ceiling of max importance for this class. */
2125 qosinfo.qos_tier = pthread_priority_get_qos_class(pri);
2126 qosinfo.tier_importance = 0;
2127
2128 PTHREAD_TRACE(TRACE_wq_reset_priority | DBG_FUNC_START, wq, thread_tid(tl->th_thread), pthread_priority_from_class_index(orig_class), 0, 0);
2129
2130 /* All the previous implementation here now boils down to setting the QoS policy on the thread. */
2131 pthread_kern->thread_policy_set_internal(th_to_run, THREAD_QOS_POLICY, (thread_policy_t)&qosinfo, THREAD_QOS_POLICY_COUNT);
2132
2133 PTHREAD_TRACE(TRACE_wq_reset_priority | DBG_FUNC_END, wq, thread_tid(tl->th_thread), pthread_priority_from_class_index(priclass), qosinfo.qos_tier, 0);
2134 }
2135
2136 /*
2137 * if current thread is reused for work request, does not return via unix_syscall
2138 */
2139 wq_runreq(p, overcommit, pthread_priority_from_class_index(priclass), th_to_run, tl, reuse_thread, wake_thread, (thread == th_to_run));
2140
2141 PTHREAD_TRACE(TRACE_wq_run_nextitem|DBG_FUNC_END, wq, thread_tid(th_to_run), overcommit, 1, 0);
2142
2143 return (TRUE);
2144
2145 out_of_work:
2146 /*
2147 * we have no work to do or we are fully booked
2148 * w/r to running threads...
2149 */
2150 no_thread_to_run:
2151 workqueue_unlock(p);
2152
2153 if (start_timer)
2154 workqueue_interval_timer_start(wq);
2155
2156 PTHREAD_TRACE(TRACE_wq_run_nextitem|DBG_FUNC_END, wq, thread_tid(thread), start_timer, 2, 0);
2157
2158 return (FALSE);
2159
2160 parkit:
2161 /*
2162 * this is a workqueue thread with no more
2163 * work to do... park it for now
2164 */
2165 TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
2166 tl->th_flags &= ~TH_LIST_RUNNING;
2167
2168 tl->th_flags |= TH_LIST_BLOCKED;
2169 TAILQ_INSERT_HEAD(&wq->wq_thidlelist, tl, th_entry);
2170
2171 pthread_kern->thread_sched_call(th_to_park, NULL);
2172
2173 OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority]);
2174 wq->wq_thscheduled_count[tl->th_priority]--;
2175 wq->wq_threads_scheduled--;
2176
2177 if (tl->th_flags & TH_LIST_CONSTRAINED) {
2178 wq->wq_constrained_threads_scheduled--;
2179 wq->wq_lflags &= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
2180 tl->th_flags &= ~TH_LIST_CONSTRAINED;
2181 }
2182 if (wq->wq_thidlecount < 100)
2183 us_to_wait = wq_reduce_pool_window_usecs - (wq->wq_thidlecount * (wq_reduce_pool_window_usecs / 100));
2184 else
2185 us_to_wait = wq_reduce_pool_window_usecs / 100;
2186
2187 wq->wq_thidlecount++;
2188 wq->wq_lflags &= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
2189
2190 assert_wait_timeout_with_leeway((caddr_t)tl, (THREAD_INTERRUPTIBLE),
2191 TIMEOUT_URGENCY_SYS_BACKGROUND|TIMEOUT_URGENCY_LEEWAY, us_to_wait,
2192 wq_reduce_pool_window_usecs, NSEC_PER_USEC);
2193
2194 workqueue_unlock(p);
2195
2196 if (start_timer)
2197 workqueue_interval_timer_start(wq);
2198
2199 PTHREAD_TRACE1(TRACE_wq_thread_park | DBG_FUNC_START, wq, wq->wq_threads_scheduled, wq->wq_thidlecount, us_to_wait, thread_tid(th_to_park));
2200 PTHREAD_TRACE(TRACE_wq_run_nextitem | DBG_FUNC_END, wq, thread_tid(thread), 0, 3, 0);
2201
2202 thread_block((thread_continue_t)wq_unpark_continue);
2203 /* NOT REACHED */
2204
2205 return (FALSE);
2206 }
2207
2208
2209 static void
2210 wq_unsuspend_continue(void)
2211 {
2212 struct uthread *uth = NULL;
2213 thread_t th_to_unsuspend;
2214 struct threadlist *tl;
2215 proc_t p;
2216
2217 th_to_unsuspend = current_thread();
2218 uth = pthread_kern->get_bsdthread_info(th_to_unsuspend);
2219
2220 if (uth != NULL && (tl = pthread_kern->uthread_get_threadlist(uth)) != NULL) {
2221
2222 if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) {
2223 /*
2224 * most likely a normal resume of this thread occurred...
2225 * it's also possible that the thread was aborted after we
2226 * finished setting it up so that it could be dispatched... if
2227 * so, thread_bootstrap_return will notice the abort and put
2228 * the thread on the path to self-destruction
2229 */
2230 normal_resume_to_user:
2231 pthread_kern->thread_sched_call(th_to_unsuspend, workqueue_callback);
2232 pthread_kern->thread_bootstrap_return();
2233 }
2234 /*
2235 * if we get here, it's because we've been resumed due to
2236 * an abort of this thread (process is crashing)
2237 */
2238 p = current_proc();
2239
2240 workqueue_lock_spin(p);
2241
2242 if (tl->th_flags & TH_LIST_SUSPENDED) {
2243 /*
2244 * thread has been aborted while still on our idle
2245 * queue... remove it from our domain...
2246 * workqueue_removethread consumes the lock
2247 */
2248 workqueue_removethread(tl, 0);
2249 pthread_kern->thread_bootstrap_return();
2250 }
2251 while ((tl->th_flags & TH_LIST_BUSY)) {
2252 /*
2253 * this thread was aborted after we started making
2254 * it runnable, but before we finished dispatching it...
2255 * we need to wait for that process to finish,
2256 * and we need to ask for a wakeup instead of a
2257 * thread_resume since the abort has already resumed us
2258 */
2259 tl->th_flags |= TH_LIST_NEED_WAKEUP;
2260
2261 assert_wait((caddr_t)tl, (THREAD_UNINT));
2262
2263 workqueue_unlock(p);
2264 thread_block(THREAD_CONTINUE_NULL);
2265 workqueue_lock_spin(p);
2266 }
2267 workqueue_unlock(p);
2268 /*
2269 * we have finished setting up the thread's context...
2270 * thread_bootstrap_return will take us through the abort path
2271 * where the thread will self destruct
2272 */
2273 goto normal_resume_to_user;
2274 }
2275 pthread_kern->thread_bootstrap_return();
2276 }
2277
2278
2279 static void
2280 wq_unpark_continue(void)
2281 {
2282 struct uthread *uth = NULL;
2283 struct threadlist *tl;
2284 thread_t th_to_unpark;
2285 proc_t p;
2286
2287 th_to_unpark = current_thread();
2288 uth = pthread_kern->get_bsdthread_info(th_to_unpark);
2289
2290 if (uth != NULL) {
2291 if ((tl = pthread_kern->uthread_get_threadlist(uth)) != NULL) {
2292
2293 if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) {
2294 /*
2295 * a normal wakeup of this thread occurred... no need
2296 * for any synchronization with the timer and wq_runreq
2297 */
2298 normal_return_to_user:
2299 pthread_kern->thread_sched_call(th_to_unpark, workqueue_callback);
2300
2301 PTHREAD_TRACE(0xefffd018 | DBG_FUNC_END, tl->th_workq, 0, 0, 0, 0);
2302
2303 pthread_kern->thread_exception_return();
2304 }
2305 p = current_proc();
2306
2307 workqueue_lock_spin(p);
2308
2309 if ( !(tl->th_flags & TH_LIST_RUNNING)) {
2310 /*
2311 * the timer popped us out and we've not
2312 * been moved off of the idle list
2313 * so we should now self-destruct
2314 *
2315 * workqueue_removethread consumes the lock
2316 */
2317 workqueue_removethread(tl, 0);
2318 pthread_kern->thread_exception_return();
2319 }
2320 /*
2321 * the timer woke us up, but we have already
2322 * started to make this a runnable thread,
2323 * but have not yet finished that process...
2324 * so wait for the normal wakeup
2325 */
2326 while ((tl->th_flags & TH_LIST_BUSY)) {
2327
2328 assert_wait((caddr_t)tl, (THREAD_UNINT));
2329
2330 workqueue_unlock(p);
2331
2332 thread_block(THREAD_CONTINUE_NULL);
2333
2334 workqueue_lock_spin(p);
2335 }
2336 /*
2337 * we have finished setting up the thread's context
2338 * now we can return as if we got a normal wakeup
2339 */
2340 workqueue_unlock(p);
2341
2342 goto normal_return_to_user;
2343 }
2344 }
2345 pthread_kern->thread_exception_return();
2346 }
2347
2348
2349
2350 static void
2351 wq_runreq(proc_t p, boolean_t overcommit, pthread_priority_t priority, thread_t th, struct threadlist *tl,
2352 int reuse_thread, int wake_thread, int return_directly)
2353 {
2354 int ret = 0;
2355 boolean_t need_resume = FALSE;
2356
2357 PTHREAD_TRACE1(TRACE_wq_runitem | DBG_FUNC_START, tl->th_workq, overcommit, priority, thread_tid(current_thread()), thread_tid(th));
2358
2359 ret = _setup_wqthread(p, th, overcommit, priority, reuse_thread, tl);
2360
2361 if (ret != 0)
2362 panic("setup_wqthread failed %x\n", ret);
2363
2364 if (return_directly) {
2365 PTHREAD_TRACE(TRACE_wq_run_nextitem|DBG_FUNC_END, tl->th_workq, 0, 0, 4, 0);
2366
2367 pthread_kern->thread_exception_return();
2368 panic("wq_runreq: thread_exception_return returned ...\n");
2369 }
2370 if (wake_thread) {
2371 workqueue_lock_spin(p);
2372
2373 tl->th_flags &= ~TH_LIST_BUSY;
2374 wakeup(tl);
2375
2376 workqueue_unlock(p);
2377 } else {
2378 PTHREAD_TRACE1(TRACE_wq_thread_suspend | DBG_FUNC_END, tl->th_workq, 0, 0, thread_tid(current_thread()), thread_tid(th));
2379
2380 workqueue_lock_spin(p);
2381
2382 if (tl->th_flags & TH_LIST_NEED_WAKEUP) {
2383 wakeup(tl);
2384 } else {
2385 need_resume = TRUE;
2386 }
2387
2388 tl->th_flags &= ~(TH_LIST_BUSY | TH_LIST_NEED_WAKEUP);
2389
2390 workqueue_unlock(p);
2391
2392 if (need_resume) {
2393 /*
2394 * need to do this outside of the workqueue spin lock
2395 * since thread_resume locks the thread via a full mutex
2396 */
2397 pthread_kern->thread_resume(th);
2398 }
2399 }
2400 }
2401
2402
2403 int
2404 _setup_wqthread(proc_t p, thread_t th, boolean_t overcommit, pthread_priority_t priority, int reuse_thread, struct threadlist *tl)
2405 {
2406 uint32_t flags = reuse_thread | WQ_FLAG_THREAD_NEWSPI;
2407 mach_vm_size_t guardsize = vm_map_page_size(tl->th_workq->wq_map);
2408 int error = 0;
2409
2410 if (overcommit) {
2411 flags |= WQ_FLAG_THREAD_OVERCOMMIT;
2412 }
2413
2414 /* Put the QoS class value into the lower bits of the reuse_thread register, this is where
2415 * the thread priority used to be stored anyway.
2416 */
2417 flags |= (_pthread_priority_get_qos_newest(priority) & WQ_FLAG_THREAD_PRIOMASK);
2418
2419 #if defined(__i386__) || defined(__x86_64__)
2420 int isLP64 = proc_is64bit(p);
2421
2422 /*
2423 * Set up i386 registers & function call.
2424 */
2425 if (isLP64 == 0) {
2426 x86_thread_state32_t state;
2427 x86_thread_state32_t *ts = &state;
2428
2429 ts->eip = (unsigned int)pthread_kern->proc_get_wqthread(p);
2430 ts->eax = (unsigned int)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize);
2431 ts->ebx = (unsigned int)tl->th_thport;
2432 ts->ecx = (unsigned int)(tl->th_stackaddr + guardsize);
2433 ts->edx = (unsigned int)0;
2434 ts->edi = (unsigned int)flags;
2435 ts->esi = (unsigned int)0;
2436 /*
2437 * set stack pointer
2438 */
2439 ts->esp = (int)((vm_offset_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize) - C_32_STK_ALIGN));
2440
2441 (void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)ts);
2442
2443 } else {
2444 x86_thread_state64_t state64;
2445 x86_thread_state64_t *ts64 = &state64;
2446
2447 ts64->rip = (uint64_t)pthread_kern->proc_get_wqthread(p);
2448 ts64->rdi = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize);
2449 ts64->rsi = (uint64_t)(tl->th_thport);
2450 ts64->rdx = (uint64_t)(tl->th_stackaddr + guardsize);
2451 ts64->rcx = (uint64_t)0;
2452 ts64->r8 = (uint64_t)flags;
2453 ts64->r9 = (uint64_t)0;
2454
2455 /*
2456 * set stack pointer aligned to 16 byte boundary
2457 */
2458 ts64->rsp = (uint64_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize) - C_64_REDZONE_LEN);
2459
2460 error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)ts64);
2461 if (error != KERN_SUCCESS) {
2462 error = EINVAL;
2463 }
2464 }
2465 #else
2466 #error setup_wqthread not defined for this architecture
2467 #endif
2468
2469 return error;
2470 }
2471
2472 int
2473 _fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo)
2474 {
2475 struct workqueue * wq;
2476 int error = 0;
2477 int activecount;
2478 uint32_t pri;
2479
2480 workqueue_lock_spin(p);
2481 if ((wq = pthread_kern->proc_get_wqptr(p)) == NULL) {
2482 error = EINVAL;
2483 goto out;
2484 }
2485 activecount = 0;
2486
2487 for (pri = 0; pri < WORKQUEUE_NUM_BUCKETS; pri++) {
2488 activecount += wq->wq_thactive_count[pri];
2489 }
2490 pwqinfo->pwq_nthreads = wq->wq_nthreads;
2491 pwqinfo->pwq_runthreads = activecount;
2492 pwqinfo->pwq_blockedthreads = wq->wq_threads_scheduled - activecount;
2493 pwqinfo->pwq_state = 0;
2494
2495 if (wq->wq_lflags & WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT) {
2496 pwqinfo->pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
2497 }
2498
2499 if (wq->wq_lflags & WQL_EXCEEDED_TOTAL_THREAD_LIMIT) {
2500 pwqinfo->pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
2501 }
2502
2503 out:
2504 workqueue_unlock(p);
2505 return(error);
2506 }
2507
2508 int
2509 _thread_selfid(__unused struct proc *p, uint64_t *retval)
2510 {
2511 thread_t thread = current_thread();
2512 *retval = thread_tid(thread);
2513 return KERN_SUCCESS;
2514 }
2515
2516 void
2517 _pthread_init(void)
2518 {
2519 pthread_lck_grp_attr = lck_grp_attr_alloc_init();
2520 pthread_lck_grp = lck_grp_alloc_init("pthread", pthread_lck_grp_attr);
2521
2522 /*
2523 * allocate the lock attribute for pthread synchronizers
2524 */
2525 pthread_lck_attr = lck_attr_alloc_init();
2526
2527 _workqueue_init_lock((proc_t)get_bsdtask_info(kernel_task));
2528 pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
2529
2530 pth_global_hashinit();
2531 psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL);
2532 psynch_zoneinit();
2533
2534 /*
2535 * register sysctls
2536 */
2537 sysctl_register_oid(&sysctl__kern_wq_yielded_threshold);
2538 sysctl_register_oid(&sysctl__kern_wq_yielded_window_usecs);
2539 sysctl_register_oid(&sysctl__kern_wq_stalled_window_usecs);
2540 sysctl_register_oid(&sysctl__kern_wq_reduce_pool_window_usecs);
2541 sysctl_register_oid(&sysctl__kern_wq_max_timer_interval_usecs);
2542 sysctl_register_oid(&sysctl__kern_wq_max_threads);
2543 sysctl_register_oid(&sysctl__kern_wq_max_constrained_threads);
2544 sysctl_register_oid(&sysctl__kern_pthread_debug_tracing);
2545 }