2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
33 #define _PTHREAD_CONDATTR_T
34 #define _PTHREAD_COND_T
35 #define _PTHREAD_MUTEXATTR_T
36 #define _PTHREAD_MUTEX_T
37 #define _PTHREAD_RWLOCKATTR_T
38 #define _PTHREAD_RWLOCK_T
40 #undef pthread_mutexattr_t
41 #undef pthread_mutex_t
42 #undef pthread_condattr_t
44 #undef pthread_rwlockattr_t
45 #undef pthread_rwlock_t
47 #include <sys/param.h>
48 #include <sys/queue.h>
49 #include <sys/resourcevar.h>
50 #include <sys/proc_internal.h>
51 #include <sys/kauth.h>
52 #include <sys/systm.h>
53 #include <sys/timeb.h>
54 #include <sys/times.h>
56 #include <sys/kernel.h>
58 #include <sys/signalvar.h>
59 #include <sys/syslog.h>
62 #include <sys/kdebug.h>
63 #include <sys/sysproto.h>
64 #include <sys/pthread_internal.h>
66 #include <sys/user.h> /* for coredump */
69 #include <mach/mach_types.h>
70 #include <mach/vm_prot.h>
71 #include <mach/semaphore.h>
72 #include <mach/sync_policy.h>
73 #include <mach/task.h>
74 #include <kern/kern_types.h>
75 #include <kern/task.h>
76 #include <kern/clock.h>
77 #include <mach/kern_return.h>
78 #include <kern/thread.h>
79 #include <kern/sched_prim.h>
80 #include <kern/kalloc.h>
81 #include <kern/sched_prim.h> /* for thread_exception_return */
82 #include <kern/processor.h>
83 #include <kern/affinity.h>
84 #include <mach/mach_vm.h>
85 #include <mach/mach_param.h>
86 #include <mach/thread_status.h>
87 #include <mach/thread_policy.h>
88 #include <mach/message.h>
89 #include <mach/port.h>
90 #include <vm/vm_protos.h>
91 #include <vm/vm_map.h>` /* for current_map() */
92 #include <mach/thread_act.h> /* for thread_resume */
93 #include <machine/machine_routines.h>
95 #include <i386/machine_routines.h>
96 #include <i386/eflags.h>
101 #include <libkern/OSAtomic.h>
105 #define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
107 #define KERNEL_DEBUG1 KERNEL_DEBUG_CONSTANT1
111 #if defined(__ppc__) || defined(__ppc64__)
112 #include <architecture/ppc/cframe.h>
116 lck_grp_attr_t
*pthread_lck_grp_attr
;
117 lck_grp_t
*pthread_lck_grp
;
118 lck_attr_t
*pthread_lck_attr
;
119 lck_mtx_t
* pthread_list_mlock
;
120 extern void pthread_init(void);
122 extern kern_return_t
thread_getstatus(register thread_t act
, int flavor
,
123 thread_state_t tstate
, mach_msg_type_number_t
*count
);
124 extern kern_return_t
thread_setstatus(thread_t thread
, int flavor
,
125 thread_state_t tstate
, mach_msg_type_number_t count
);
126 extern void thread_set_cthreadself(thread_t thread
, uint64_t pself
, int isLP64
);
127 extern kern_return_t
mach_port_deallocate(ipc_space_t
, mach_port_name_t
);
128 extern kern_return_t
semaphore_signal_internal_trap(mach_port_name_t
);
130 static int workqueue_additem(struct workqueue
*wq
, int prio
, user_addr_t item
);
131 static int workqueue_removeitem(struct workqueue
*wq
, int prio
, user_addr_t item
);
132 static void workqueue_run_nextitem(proc_t p
, thread_t th
);
133 static void wq_runitem(proc_t p
, user_addr_t item
, thread_t th
, struct threadlist
*tl
,
134 int reuse_thread
, int wake_thread
, int return_directly
);
135 static int setup_wqthread(proc_t p
, thread_t th
, user_addr_t item
, int reuse_thread
, struct threadlist
*tl
);
136 static int workqueue_addnewthread(struct workqueue
*wq
);
137 static void workqueue_removethread(struct workqueue
*wq
);
138 static void workqueue_lock(proc_t
);
139 static void workqueue_lock_spin(proc_t
);
140 static void workqueue_unlock(proc_t
);
142 #define C_32_STK_ALIGN 16
143 #define C_64_STK_ALIGN 16
144 #define C_64_REDZONE_LEN 128
145 #define TRUNC_DOWN32(a,c) ((((uint32_t)a)-(c)) & ((uint32_t)(-(c))))
146 #define TRUNC_DOWN64(a,c) ((((uint64_t)a)-(c)) & ((uint64_t)(-(c))))
150 * Flags filed passed to bsdthread_create and back in pthread_start
151 31 <---------------------------------> 0
152 _________________________________________
153 | flags(8) | policy(8) | importance(16) |
154 -----------------------------------------
156 void _pthread_start(pthread_t self
, mach_port_t kport
, void *(*fun
)(void *), void * funarg
, size_t stacksize
, unsigned int flags
);
158 #define PTHREAD_START_CUSTOM 0x01000000
159 #define PTHREAD_START_SETSCHED 0x02000000
160 #define PTHREAD_START_DETACHED 0x04000000
161 #define PTHREAD_START_POLICY_BITSHIFT 16
162 #define PTHREAD_START_POLICY_MASK 0xffff
163 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
165 #define SCHED_OTHER POLICY_TIMESHARE
166 #define SCHED_FIFO POLICY_FIFO
167 #define SCHED_RR POLICY_RR
173 pthread_lck_grp_attr
= lck_grp_attr_alloc_init();
174 pthread_lck_grp
= lck_grp_alloc_init("pthread", pthread_lck_grp_attr
);
177 * allocate the lock attribute for pthread synchronizers
179 pthread_lck_attr
= lck_attr_alloc_init();
181 pthread_list_mlock
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
186 pthread_list_lock(void)
188 lck_mtx_lock(pthread_list_mlock
);
192 pthread_list_unlock(void)
194 lck_mtx_unlock(pthread_list_mlock
);
199 __pthread_mutex_destroy(__unused
struct proc
*p
, struct __pthread_mutex_destroy_args
*uap
, __unused register_t
*retval
)
202 int mutexid
= uap
->mutexid
;
203 pthread_mutex_t
* mutex
;
208 mutex
= pthread_id_to_mutex(mutexid
);
212 MTX_LOCK(mutex
->lock
);
213 if (mutex
->sig
== _PTHREAD_KERN_MUTEX_SIG
)
215 if (mutex
->owner
== (thread_t
)NULL
&&
216 mutex
->refcount
== 1)
218 mutex
->sig
= _PTHREAD_NO_SIG
;
222 pthread_id_mutex_remove(mutexid
);
224 MTX_UNLOCK(mutex
->lock
);
225 lck_mtx_free(lmtx
, pthread_lck_grp
);
226 lck_mtx_free(lmtx1
, pthread_lck_grp
);
227 kfree((void *)mutex
, sizeof(struct _pthread_mutex
));
235 MTX_UNLOCK(mutex
->lock
);
236 pthread_mutex_release(mutex
);
241 * Initialize a mutex variable, possibly with additional attributes.
244 pthread_mutex_init_internal(pthread_mutex_t
*mutex
, const pthread_mutexattr_t
*attr
)
246 mutex
->prioceiling
= attr
->prioceiling
;
247 mutex
->protocol
= attr
->protocol
;
248 mutex
->type
= attr
->type
;
249 mutex
->pshared
= attr
->pshared
;
251 mutex
->owner
= (thread_t
)NULL
;
252 mutex
->owner_proc
= current_proc();
253 mutex
->sig
= _PTHREAD_KERN_MUTEX_SIG
;
254 mutex
->lock
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
255 mutex
->mutex
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
259 * Initialize a mutex variable, possibly with additional attributes.
260 * Public interface - so don't trust the lock - initialize it first.
263 __pthread_mutex_init(__unused
struct proc
*p
, struct __pthread_mutex_init_args
*uap
, __unused register_t
*retval
)
265 user_addr_t umutex
= uap
->mutex
;
266 pthread_mutex_t
* mutex
;
267 user_addr_t uattr
= uap
->attr
;
268 pthread_mutexattr_t attr
;
269 unsigned int addr
= (unsigned int)((uintptr_t)uap
->mutex
);
274 if ((umutex
== 0) || (uattr
== 0))
277 if ((error
= copyin(uattr
, &attr
, sizeof(pthread_mutexattr_t
))))
280 if (attr
.sig
!= _PTHREAD_MUTEX_ATTR_SIG
)
283 if ((error
= copyin(umutex
, &pmutex_sig
, sizeof(int))))
286 if (pmutex_sig
== _PTHREAD_KERN_MUTEX_SIG
)
288 mutex
= (pthread_mutex_t
*)kalloc(sizeof(pthread_mutex_t
));
290 pthread_mutex_init_internal(mutex
, &attr
);
294 mutexid
= pthread_id_mutex_add(mutex
);
296 if ((error
= copyout(&mutexid
, ((user_addr_t
)((uintptr_t)(addr
))), 4)))
303 pthread_id_mutex_remove(mutexid
);
304 lck_mtx_free(mutex
->lock
, pthread_lck_grp
);
305 lck_mtx_free(mutex
->mutex
, pthread_lck_grp
);
306 kfree(mutex
, sizeof(struct _pthread_mutex
));
312 * TODO: Priority inheritance stuff
315 __pthread_mutex_lock(struct proc
*p
, struct __pthread_mutex_lock_args
*uap
, __unused register_t
*retval
)
317 int mutexid
= uap
->mutexid
;
318 pthread_mutex_t
* mutex
;
321 mutex
= pthread_id_to_mutex(mutexid
);
325 MTX_LOCK(mutex
->lock
);
327 if (mutex
->sig
!= _PTHREAD_KERN_MUTEX_SIG
)
333 if ((p
!= mutex
->owner_proc
) && (mutex
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
338 MTX_UNLOCK(mutex
->lock
);
340 lck_mtx_lock(mutex
->mutex
);
342 MTX_LOCK(mutex
->lock
);
343 mutex
->owner
= current_thread();
346 MTX_UNLOCK(mutex
->lock
);
347 pthread_mutex_release(mutex
);
352 * Attempt to lock a mutex, but don't block if this isn't possible.
355 __pthread_mutex_trylock(struct proc
*p
, struct __pthread_mutex_trylock_args
*uap
, __unused register_t
*retval
)
357 int mutexid
= uap
->mutexid
;
358 pthread_mutex_t
* mutex
;
362 mutex
= pthread_id_to_mutex(mutexid
);
366 MTX_LOCK(mutex
->lock
);
368 if (mutex
->sig
!= _PTHREAD_KERN_MUTEX_SIG
)
374 if ((p
!= mutex
->owner_proc
) && (mutex
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
379 MTX_UNLOCK(mutex
->lock
);
381 state
= lck_mtx_try_lock(mutex
->mutex
);
383 MTX_LOCK(mutex
->lock
);
384 mutex
->owner
= current_thread();
385 MTX_UNLOCK(mutex
->lock
);
390 pthread_mutex_release(mutex
);
393 MTX_UNLOCK(mutex
->lock
);
394 pthread_mutex_release(mutex
);
400 * TODO: Priority inheritance stuff
403 __pthread_mutex_unlock(struct proc
*p
, struct __pthread_mutex_unlock_args
*uap
, __unused register_t
*retval
)
405 int mutexid
= uap
->mutexid
;
406 pthread_mutex_t
* mutex
;
409 mutex
= pthread_id_to_mutex(mutexid
);
413 MTX_LOCK(mutex
->lock
);
415 if (mutex
->sig
!= _PTHREAD_KERN_MUTEX_SIG
)
421 if ((p
!= mutex
->owner_proc
) && (mutex
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
426 MTX_UNLOCK(mutex
->lock
);
428 lck_mtx_unlock(mutex
->mutex
);
430 MTX_LOCK(mutex
->lock
);
434 MTX_UNLOCK(mutex
->lock
);
435 pthread_mutex_release(mutex
);
441 __pthread_cond_init(__unused
struct proc
*p
, struct __pthread_cond_init_args
*uap
, __unused register_t
*retval
)
443 pthread_cond_t
* cond
;
444 pthread_condattr_t attr
;
445 user_addr_t ucond
= uap
->cond
;
446 user_addr_t uattr
= uap
->attr
;
447 unsigned int addr
= (unsigned int)((uintptr_t)uap
->cond
);
448 int condid
, error
, cond_sig
;
453 if ((ucond
== 0) || (uattr
== 0))
456 if ((error
= copyin(uattr
, &attr
, sizeof(pthread_condattr_t
))))
459 if (attr
.sig
!= _PTHREAD_COND_ATTR_SIG
)
462 if ((error
= copyin(ucond
, &cond_sig
, sizeof(int))))
465 if (cond_sig
== _PTHREAD_KERN_COND_SIG
)
467 kret
= semaphore_create(kernel_task
, &sem
, SYNC_POLICY_FIFO
, value
);
468 if (kret
!= KERN_SUCCESS
)
471 cond
= (pthread_cond_t
*)kalloc(sizeof(pthread_cond_t
));
473 cond
->lock
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
474 cond
->pshared
= attr
.pshared
;
475 cond
->sig
= _PTHREAD_KERN_COND_SIG
;
476 cond
->sigpending
= 0;
479 cond
->mutex
= (pthread_mutex_t
*)0;
480 cond
->owner_proc
= current_proc();
484 condid
= pthread_id_cond_add(cond
);
486 if ((error
= copyout(&condid
, ((user_addr_t
)((uintptr_t)(addr
))), 4)))
493 pthread_id_cond_remove(condid
);
494 semaphore_destroy(kernel_task
, cond
->sem
);
495 kfree(cond
, sizeof(pthread_cond_t
));
501 * Destroy a condition variable.
504 __pthread_cond_destroy(__unused
struct proc
*p
, struct __pthread_cond_destroy_args
*uap
, __unused register_t
*retval
)
506 pthread_cond_t
*cond
;
507 int condid
= uap
->condid
;
512 cond
= pthread_id_to_cond(condid
);
516 COND_LOCK(cond
->lock
);
517 if (cond
->sig
== _PTHREAD_KERN_COND_SIG
)
519 if (cond
->refcount
== 1)
521 cond
->sig
= _PTHREAD_NO_SIG
;
525 pthread_id_cond_remove(condid
);
527 COND_UNLOCK(cond
->lock
);
528 lck_mtx_free(lmtx
, pthread_lck_grp
);
529 (void)semaphore_destroy(kernel_task
, sem
);
530 kfree((void *)cond
, sizeof(pthread_cond_t
));
538 COND_UNLOCK(cond
->lock
);
539 pthread_cond_release(cond
);
545 * Signal a condition variable, waking up all threads waiting for it.
548 __pthread_cond_broadcast(__unused
struct proc
*p
, struct __pthread_cond_broadcast_args
*uap
, __unused register_t
*retval
)
550 int condid
= uap
->condid
;
551 pthread_cond_t
* cond
;
555 cond
= pthread_id_to_cond(condid
);
559 COND_LOCK(cond
->lock
);
561 if (cond
->sig
!= _PTHREAD_KERN_COND_SIG
)
567 if ((p
!= cond
->owner_proc
) && (cond
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
572 COND_UNLOCK(cond
->lock
);
574 kret
= semaphore_signal_all(cond
->sem
);
576 case KERN_INVALID_ADDRESS
:
577 case KERN_PROTECTION_FAILURE
:
581 case KERN_OPERATION_TIMED_OUT
:
592 COND_LOCK(cond
->lock
);
594 COND_UNLOCK(cond
->lock
);
595 pthread_cond_release(cond
);
601 * Signal a condition variable, waking only one thread.
604 __pthread_cond_signal(__unused
struct proc
*p
, struct __pthread_cond_signal_args
*uap
, __unused register_t
*retval
)
606 int condid
= uap
->condid
;
607 pthread_cond_t
* cond
;
611 cond
= pthread_id_to_cond(condid
);
615 COND_LOCK(cond
->lock
);
617 if (cond
->sig
!= _PTHREAD_KERN_COND_SIG
)
623 if ((p
!= cond
->owner_proc
) && (cond
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
628 COND_UNLOCK(cond
->lock
);
630 kret
= semaphore_signal(cond
->sem
);
632 case KERN_INVALID_ADDRESS
:
633 case KERN_PROTECTION_FAILURE
:
637 case KERN_OPERATION_TIMED_OUT
:
648 COND_LOCK(cond
->lock
);
650 COND_UNLOCK(cond
->lock
);
651 pthread_cond_release(cond
);
657 __pthread_cond_wait(__unused
struct proc
*p
, struct __pthread_cond_wait_args
*uap
, __unused register_t
*retval
)
659 int condid
= uap
->condid
;
660 pthread_cond_t
* cond
;
661 int mutexid
= uap
->mutexid
;
662 pthread_mutex_t
* mutex
;
666 cond
= pthread_id_to_cond(condid
);
670 mutex
= pthread_id_to_mutex(mutexid
);
672 pthread_cond_release(cond
);
675 COND_LOCK(cond
->lock
);
677 if (cond
->sig
!= _PTHREAD_KERN_COND_SIG
)
683 if ((p
!= cond
->owner_proc
) && (cond
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
688 COND_UNLOCK(cond
->lock
);
690 kret
= semaphore_wait(cond
->sem
);
692 case KERN_INVALID_ADDRESS
:
693 case KERN_PROTECTION_FAILURE
:
697 case KERN_OPERATION_TIMED_OUT
:
708 COND_LOCK(cond
->lock
);
710 COND_UNLOCK(cond
->lock
);
711 pthread_cond_release(cond
);
712 pthread_mutex_release(mutex
);
717 __pthread_cond_timedwait(__unused
struct proc
*p
, struct __pthread_cond_timedwait_args
*uap
, __unused register_t
*retval
)
719 int condid
= uap
->condid
;
720 pthread_cond_t
* cond
;
721 int mutexid
= uap
->mutexid
;
722 pthread_mutex_t
* mutex
;
723 mach_timespec_t absts
;
731 if ((error
= copyin(uap
->abstime
, &absts
, sizeof(mach_timespec_t
))))
733 cond
= pthread_id_to_cond(condid
);
737 mutex
= pthread_id_to_mutex(mutexid
);
739 pthread_cond_release(cond
);
742 COND_LOCK(cond
->lock
);
744 if (cond
->sig
!= _PTHREAD_KERN_COND_SIG
)
750 if ((p
!= cond
->owner_proc
) && (cond
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
755 COND_UNLOCK(cond
->lock
);
757 kret
= semaphore_timedwait(cond
->sem
, absts
);
759 case KERN_INVALID_ADDRESS
:
760 case KERN_PROTECTION_FAILURE
:
764 case KERN_OPERATION_TIMED_OUT
:
775 COND_LOCK(cond
->lock
);
777 COND_UNLOCK(cond
->lock
);
778 pthread_cond_release(cond
);
779 pthread_mutex_release(mutex
);
784 bsdthread_create(__unused
struct proc
*p
, struct bsdthread_create_args
*uap
, user_addr_t
*retval
)
790 mach_vm_offset_t stackaddr
;
791 mach_vm_size_t th_allocsize
= 0;
792 mach_vm_size_t user_stacksize
;
793 mach_vm_size_t th_stacksize
;
794 mach_vm_offset_t th_stackaddr
;
795 mach_vm_offset_t th_stack
;
796 mach_vm_offset_t th_pthread
;
797 mach_port_t th_thport
;
799 user_addr_t user_func
= uap
->func
;
800 user_addr_t user_funcarg
= uap
->func_arg
;
801 user_addr_t user_stack
= uap
->stack
;
802 user_addr_t user_pthread
= uap
->pthread
;
803 unsigned int flags
= (unsigned int)uap
->flags
;
804 vm_map_t vmap
= current_map();
805 task_t ctask
= current_task();
806 unsigned int policy
, importance
;
812 KERNEL_DEBUG_CONSTANT(0x9000080 | DBG_FUNC_START
, flags
, 0, 0, 0, 0);
815 isLP64
= IS_64BIT_PROCESS(p
);
819 stackaddr
= 0xF0000000;
820 #elif defined(__i386__)
821 stackaddr
= 0xB0000000;
823 #error Need to define a stack address hint for this architecture
825 kret
= thread_create(ctask
, &th
);
826 if (kret
!= KERN_SUCCESS
)
828 thread_reference(th
);
830 sright
= (void *) convert_thread_to_port(th
);
831 th_thport
= (void *)ipc_port_copyout_send(sright
, get_task_ipcspace(ctask
));
833 if ((flags
& PTHREAD_START_CUSTOM
) == 0) {
834 th_stacksize
= (mach_vm_size_t
)user_stack
; /* if it is custom them it is stacksize */
835 th_allocsize
= th_stacksize
+ PTH_DEFAULT_GUARDSIZE
+ p
->p_pthsize
;
837 kret
= mach_vm_map(vmap
, &stackaddr
,
840 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
, NULL
,
841 0, FALSE
, VM_PROT_DEFAULT
, VM_PROT_ALL
,
843 if (kret
!= KERN_SUCCESS
)
844 kret
= mach_vm_allocate(vmap
,
845 &stackaddr
, th_allocsize
,
846 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
);
847 if (kret
!= KERN_SUCCESS
) {
852 KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_NONE
, th_allocsize
, stackaddr
, 0, 2, 0);
854 th_stackaddr
= stackaddr
;
857 * The guard page is at the lowest address
858 * The stack base is the highest address
860 kret
= mach_vm_protect(vmap
, stackaddr
, PTH_DEFAULT_GUARDSIZE
, FALSE
, VM_PROT_NONE
);
862 if (kret
!= KERN_SUCCESS
) {
866 th_stack
= (stackaddr
+ th_stacksize
+ PTH_DEFAULT_GUARDSIZE
);
867 th_pthread
= (stackaddr
+ th_stacksize
+ PTH_DEFAULT_GUARDSIZE
);
868 user_stacksize
= th_stacksize
;
870 th_stack
= user_stack
;
871 user_stacksize
= user_stack
;
872 th_pthread
= user_pthread
;
874 KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_NONE
, 0, 0, 0, 3, 0);
880 * Set up PowerPC registers...
881 * internally they are always kept as 64 bit and
882 * since the register set is the same between 32 and 64bit modes
883 * we don't need 2 different methods for setting the state
886 ppc_thread_state64_t state64
;
887 ppc_thread_state64_t
*ts64
= &state64
;
889 ts64
->srr0
= (uint64_t)p
->p_threadstart
;
890 ts64
->r1
= (uint64_t)(th_stack
- C_ARGSAVE_LEN
- C_RED_ZONE
);
891 ts64
->r3
= (uint64_t)th_pthread
;
892 ts64
->r4
= (uint64_t)((unsigned int)th_thport
);
893 ts64
->r5
= (uint64_t)user_func
;
894 ts64
->r6
= (uint64_t)user_funcarg
;
895 ts64
->r7
= (uint64_t)user_stacksize
;
896 ts64
->r8
= (uint64_t)uap
->flags
;
898 thread_set_wq_state64(th
, (thread_state_t
)ts64
);
900 thread_set_cthreadself(th
, (uint64_t)th_pthread
, isLP64
);
902 #elif defined(__i386__)
905 * Set up i386 registers & function call.
908 x86_thread_state32_t state
;
909 x86_thread_state32_t
*ts
= &state
;
911 ts
->eip
= (int)p
->p_threadstart
;
912 ts
->eax
= (unsigned int)th_pthread
;
913 ts
->ebx
= (unsigned int)th_thport
;
914 ts
->ecx
= (unsigned int)user_func
;
915 ts
->edx
= (unsigned int)user_funcarg
;
916 ts
->edi
= (unsigned int)user_stacksize
;
917 ts
->esi
= (unsigned int)uap
->flags
;
921 ts
->esp
= (int)((vm_offset_t
)(th_stack
-C_32_STK_ALIGN
));
923 thread_set_wq_state32(th
, (thread_state_t
)ts
);
926 x86_thread_state64_t state64
;
927 x86_thread_state64_t
*ts64
= &state64
;
929 ts64
->rip
= (uint64_t)p
->p_threadstart
;
930 ts64
->rdi
= (uint64_t)th_pthread
;
931 ts64
->rsi
= (uint64_t)((unsigned int)(th_thport
));
932 ts64
->rdx
= (uint64_t)user_func
;
933 ts64
->rcx
= (uint64_t)user_funcarg
;
934 ts64
->r8
= (uint64_t)user_stacksize
;
935 ts64
->r9
= (uint64_t)uap
->flags
;
937 * set stack pointer aligned to 16 byte boundary
939 ts64
->rsp
= (uint64_t)(th_stack
- C_64_REDZONE_LEN
);
941 thread_set_wq_state64(th
, (thread_state_t
)ts64
);
945 #error bsdthread_create not defined for this architecture
947 /* Set scheduling parameters if needed */
948 if ((flags
& PTHREAD_START_SETSCHED
) != 0) {
949 thread_extended_policy_data_t extinfo
;
950 thread_precedence_policy_data_t precedinfo
;
952 importance
= (flags
& PTHREAD_START_IMPORTANCE_MASK
);
953 policy
= (flags
>> PTHREAD_START_POLICY_BITSHIFT
) & PTHREAD_START_POLICY_MASK
;
955 if (policy
== SCHED_OTHER
)
956 extinfo
.timeshare
= 1;
958 extinfo
.timeshare
= 0;
959 thread_policy_set(th
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extinfo
, THREAD_EXTENDED_POLICY_COUNT
);
961 precedinfo
.importance
= importance
;
962 thread_policy_set(th
, THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&precedinfo
, THREAD_PRECEDENCE_POLICY_COUNT
);
965 kret
= thread_resume(th
);
966 if (kret
!= KERN_SUCCESS
) {
970 thread_deallocate(th
); /* drop the creator reference */
972 KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_END
, error
, (unsigned int)th_pthread
, 0, 0, 0);
974 *retval
= th_pthread
;
980 (void)mach_vm_deallocate(vmap
, stackaddr
, th_allocsize
);
982 (void)mach_port_deallocate(get_task_ipcspace(ctask
), (mach_port_name_t
)th_thport
);
983 (void)thread_terminate(th
);
984 (void)thread_deallocate(th
);
989 bsdthread_terminate(__unused
struct proc
*p
, struct bsdthread_terminate_args
*uap
, __unused register_t
*retval
)
991 mach_vm_offset_t freeaddr
;
992 mach_vm_size_t freesize
;
994 mach_port_name_t kthport
= (mach_port_name_t
)uap
->port
;
995 mach_port_name_t sem
= (mach_port_name_t
)uap
->sem
;
997 freeaddr
= (mach_vm_offset_t
)uap
->stackaddr
;
998 freesize
= uap
->freesize
;
1001 KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_START
, (unsigned int)freeaddr
, (unsigned int)freesize
, (unsigned int)kthport
, 0xff, 0);
1003 if (sem
!= MACH_PORT_NULL
) {
1004 kret
= semaphore_signal_internal_trap(sem
);
1005 if (kret
!= KERN_SUCCESS
) {
1009 if ((freesize
!= (mach_vm_size_t
)0) && (freeaddr
!= (mach_vm_offset_t
)0)) {
1010 kret
= mach_vm_deallocate(current_map(), freeaddr
, freesize
);
1011 if (kret
!= KERN_SUCCESS
) {
1016 (void) thread_terminate(current_thread());
1017 if (kthport
!= MACH_PORT_NULL
)
1018 mach_port_deallocate(get_task_ipcspace(current_task()), kthport
);
1019 thread_exception_return();
1020 panic("bsdthread_terminate: still running\n");
1022 KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_END
, 0, 0, 0, 0xff, 0);
1029 bsdthread_register(struct proc
*p
, struct bsdthread_register_args
*uap
, __unused register_t
*retval
)
1031 /* syscall randomizer test can pass bogus values */
1032 if (uap
->pthsize
> MAX_PTHREAD_SIZE
) {
1035 p
->p_threadstart
= uap
->threadstart
;
1036 p
->p_wqthread
= uap
->wqthread
;
1037 p
->p_pthsize
= uap
->pthsize
;
1045 int wq_stalled_window_usecs
= WQ_STALLED_WINDOW_USECS
;
1046 int wq_reduce_pool_window_usecs
= WQ_REDUCE_POOL_WINDOW_USECS
;
1047 int wq_max_run_latency_usecs
= WQ_MAX_RUN_LATENCY_USECS
;
1048 int wq_timer_interval_msecs
= WQ_TIMER_INTERVAL_MSECS
;
1051 SYSCTL_INT(_kern
, OID_AUTO
, wq_stalled_window_usecs
, CTLFLAG_RW
,
1052 &wq_stalled_window_usecs
, 0, "");
1054 SYSCTL_INT(_kern
, OID_AUTO
, wq_reduce_pool_window_usecs
, CTLFLAG_RW
,
1055 &wq_reduce_pool_window_usecs
, 0, "");
1057 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_run_latency_usecs
, CTLFLAG_RW
,
1058 &wq_max_run_latency_usecs
, 0, "");
1060 SYSCTL_INT(_kern
, OID_AUTO
, wq_timer_interval_msecs
, CTLFLAG_RW
,
1061 &wq_timer_interval_msecs
, 0, "");
1067 workqueue_init_lock(proc_t p
)
1069 lck_mtx_init(&p
->p_wqlock
, pthread_lck_grp
, pthread_lck_attr
);
1073 workqueue_destroy_lock(proc_t p
)
1075 lck_mtx_destroy(&p
->p_wqlock
, pthread_lck_grp
);
1079 workqueue_lock(proc_t p
)
1081 lck_mtx_lock(&p
->p_wqlock
);
1085 workqueue_lock_spin(proc_t p
)
1087 lck_mtx_lock_spin(&p
->p_wqlock
);
1091 workqueue_unlock(proc_t p
)
1093 lck_mtx_unlock(&p
->p_wqlock
);
1099 workqueue_interval_timer_start(thread_call_t call
, int interval_in_ms
)
1103 clock_interval_to_deadline(interval_in_ms
, 1000 * 1000, &deadline
);
1105 thread_call_enter_delayed(call
, deadline
);
1110 workqueue_timer(struct workqueue
*wq
, __unused
int param1
)
1112 struct timeval tv
, dtv
;
1114 boolean_t added_more_threads
= FALSE
;
1115 boolean_t reset_maxactive
= FALSE
;
1116 boolean_t restart_timer
= FALSE
;
1120 KERNEL_DEBUG(0xefffd108, (int)wq
, 0, 0, 0, 0);
1123 * check to see if the stall frequency was beyond our tolerance
1124 * or we have work on the queue, but haven't scheduled any
1125 * new work within our acceptable time interval because
1126 * there were no idle threads left to schedule
1128 * WQ_TIMER_WATCH will only be set if we have 1 or more affinity
1129 * groups that have stalled (no active threads and no idle threads)...
1130 * it will not be set if all affinity groups have at least 1 thread
1131 * that is currently runnable... if all processors have a runnable
1132 * thread, there is no need to add more threads even if we're not
1133 * scheduling new work within our allowed window... it just means
1134 * that the work items are taking a long time to complete.
1136 if (wq
->wq_flags
& (WQ_ADD_TO_POOL
| WQ_TIMER_WATCH
)) {
1138 if (wq
->wq_flags
& WQ_ADD_TO_POOL
)
1139 added_more_threads
= TRUE
;
1141 timersub(&tv
, &wq
->wq_lastran_ts
, &dtv
);
1143 if (((dtv
.tv_sec
* 1000000) + dtv
.tv_usec
) > wq_stalled_window_usecs
)
1144 added_more_threads
= TRUE
;
1146 if (added_more_threads
== TRUE
) {
1147 for (i
= 0; i
< wq
->wq_affinity_max
&& wq
->wq_nthreads
< WORKQUEUE_MAXTHREADS
; i
++) {
1148 (void)workqueue_addnewthread(wq
);
1152 timersub(&tv
, &wq
->wq_reduce_ts
, &dtv
);
1154 if (((dtv
.tv_sec
* 1000000) + dtv
.tv_usec
) > wq_reduce_pool_window_usecs
)
1155 reset_maxactive
= TRUE
;
1158 * if the pool size has grown beyond the minimum number
1159 * of threads needed to keep all of the processors busy, and
1160 * the maximum number of threads scheduled concurrently during
1161 * the last sample period didn't exceed half the current pool
1162 * size, then its time to trim the pool size back
1164 if (added_more_threads
== FALSE
&&
1165 reset_maxactive
== TRUE
&&
1166 wq
->wq_nthreads
> wq
->wq_affinity_max
&&
1167 wq
->wq_max_threads_scheduled
<= (wq
->wq_nthreads
/ 2)) {
1168 uint32_t nthreads_to_remove
;
1170 if ((nthreads_to_remove
= (wq
->wq_nthreads
/ 4)) == 0)
1171 nthreads_to_remove
= 1;
1173 for (i
= 0; i
< nthreads_to_remove
&& wq
->wq_nthreads
> wq
->wq_affinity_max
; i
++)
1174 workqueue_removethread(wq
);
1176 workqueue_lock_spin(wq
->wq_proc
);
1178 if (reset_maxactive
== TRUE
) {
1179 wq
->wq_max_threads_scheduled
= 0;
1180 microuptime(&wq
->wq_reduce_ts
);
1182 if (added_more_threads
) {
1183 wq
->wq_flags
&= ~(WQ_ADD_TO_POOL
| WQ_TIMER_WATCH
);
1186 * since we added more threads, we should be
1187 * able to run some work if its still available
1189 workqueue_run_nextitem(wq
->wq_proc
, THREAD_NULL
);
1190 workqueue_lock_spin(wq
->wq_proc
);
1192 if ((wq
->wq_nthreads
> wq
->wq_affinity_max
) ||
1193 (wq
->wq_flags
& WQ_TIMER_WATCH
)) {
1194 restart_timer
= TRUE
;
1196 wq
->wq_flags
&= ~WQ_TIMER_RUNNING
;
1198 workqueue_unlock(wq
->wq_proc
);
1201 * we needed to knock down the WQ_TIMER_RUNNING flag while behind
1202 * the workqueue lock... however, we don't want to hold the lock
1203 * while restarting the timer and we certainly don't want 2 or more
1204 * instances of the timer... so set a local to indicate the need
1205 * for a restart since the state of wq_flags may change once we
1206 * drop the workqueue lock...
1208 if (restart_timer
== TRUE
)
1209 workqueue_interval_timer_start(wq
->wq_timer_call
, wq_timer_interval_msecs
);
1218 struct uthread
*uth
;
1219 struct threadlist
*tl
;
1220 struct workqueue
*wq
;
1222 uth
= get_bsdthread_info(thread
);
1223 tl
= uth
->uu_threadlist
;
1228 case SCHED_CALL_BLOCK
:
1230 uint32_t old_activecount
;
1232 old_activecount
= OSAddAtomic(-1, (SInt32
*)&wq
->wq_thactivecount
[tl
->th_affinity_tag
]);
1234 if (old_activecount
== 1 && wq
->wq_itemcount
) {
1236 * we were the last active thread on this affinity set
1237 * and we've got work to do
1239 workqueue_lock_spin(wq
->wq_proc
);
1241 * if this thread is blocking (not parking)
1242 * and the idle list is empty for this affinity group
1243 * we'll count it as a 'stall'
1245 if ((tl
->th_flags
& TH_LIST_RUNNING
) &&
1246 TAILQ_EMPTY(&wq
->wq_thidlelist
[tl
->th_affinity_tag
]))
1247 wq
->wq_stalled_count
++;
1249 workqueue_run_nextitem(wq
->wq_proc
, THREAD_NULL
);
1251 * workqueue_run_nextitem will drop the workqueue
1252 * lock before it returns
1255 KERNEL_DEBUG(0xefffd020, (int)thread
, wq
->wq_threads_scheduled
, tl
->th_affinity_tag
, 0, 0);
1259 case SCHED_CALL_UNBLOCK
:
1261 * we cannot take the workqueue_lock here...
1262 * an UNBLOCK can occur from a timer event which
1263 * is run from an interrupt context... if the workqueue_lock
1264 * is already held by this processor, we'll deadlock...
1265 * the thread lock for the thread being UNBLOCKED
1268 if (tl
->th_unparked
)
1269 OSAddAtomic(-1, (SInt32
*)&tl
->th_unparked
);
1271 OSAddAtomic(1, (SInt32
*)&wq
->wq_thactivecount
[tl
->th_affinity_tag
]);
1273 KERNEL_DEBUG(0xefffd024, (int)thread
, wq
->wq_threads_scheduled
, tl
->th_affinity_tag
, 0, 0);
1279 workqueue_removethread(struct workqueue
*wq
)
1281 struct threadlist
*tl
;
1282 uint32_t i
, affinity_tag
= 0;
1286 workqueue_lock_spin(wq
->wq_proc
);
1288 for (i
= 0; i
< wq
->wq_affinity_max
; i
++) {
1290 affinity_tag
= wq
->wq_nextaffinitytag
;
1292 if (affinity_tag
== 0)
1293 affinity_tag
= wq
->wq_affinity_max
- 1;
1296 wq
->wq_nextaffinitytag
= affinity_tag
;
1299 * look for an idle thread to steal from this affinity group
1300 * but don't grab the only thread associated with it
1302 if (!TAILQ_EMPTY(&wq
->wq_thidlelist
[affinity_tag
]) && wq
->wq_thcount
[affinity_tag
] > 1) {
1303 tl
= TAILQ_FIRST(&wq
->wq_thidlelist
[affinity_tag
]);
1304 TAILQ_REMOVE(&wq
->wq_thidlelist
[affinity_tag
], tl
, th_entry
);
1307 wq
->wq_thcount
[affinity_tag
]--;
1312 workqueue_unlock(wq
->wq_proc
);
1315 thread_sched_call(tl
->th_thread
, NULL
);
1317 if ( (tl
->th_flags
& TH_LIST_BLOCKED
) )
1321 * thread was created, but never used...
1322 * need to clean up the stack and port ourselves
1323 * since we're not going to spin up through the
1324 * normal exit path triggered from Libc
1326 (void)mach_vm_deallocate(wq
->wq_map
, tl
->th_stackaddr
, tl
->th_allocsize
);
1327 (void)mach_port_deallocate(get_task_ipcspace(wq
->wq_task
), (mach_port_name_t
)tl
->th_thport
);
1329 thread_terminate(tl
->th_thread
);
1331 KERNEL_DEBUG(0xefffd030, (int)tl
->th_thread
, wq
->wq_nthreads
, tl
->th_flags
& TH_LIST_BLOCKED
, 0, 0);
1333 * drop our ref on the thread
1335 thread_deallocate(tl
->th_thread
);
1337 kfree(tl
, sizeof(struct threadlist
));
1343 workqueue_addnewthread(struct workqueue
*wq
)
1345 struct threadlist
*tl
;
1346 struct uthread
*uth
;
1351 mach_vm_offset_t stackaddr
;
1352 uint32_t affinity_tag
;
1356 kret
= thread_create(wq
->wq_task
, &th
);
1358 if (kret
!= KERN_SUCCESS
)
1361 tl
= kalloc(sizeof(struct threadlist
));
1362 bzero(tl
, sizeof(struct threadlist
));
1364 #if defined(__ppc__)
1365 stackaddr
= 0xF0000000;
1366 #elif defined(__i386__)
1367 stackaddr
= 0xB0000000;
1369 #error Need to define a stack address hint for this architecture
1371 tl
->th_allocsize
= PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
+ p
->p_pthsize
;
1373 kret
= mach_vm_map(wq
->wq_map
, &stackaddr
,
1376 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
, NULL
,
1377 0, FALSE
, VM_PROT_DEFAULT
, VM_PROT_ALL
,
1378 VM_INHERIT_DEFAULT
);
1380 if (kret
!= KERN_SUCCESS
) {
1381 kret
= mach_vm_allocate(wq
->wq_map
,
1382 &stackaddr
, tl
->th_allocsize
,
1383 VM_MAKE_TAG(VM_MEMORY_STACK
) | VM_FLAGS_ANYWHERE
);
1385 if (kret
== KERN_SUCCESS
) {
1387 * The guard page is at the lowest address
1388 * The stack base is the highest address
1390 kret
= mach_vm_protect(wq
->wq_map
, stackaddr
, PTH_DEFAULT_GUARDSIZE
, FALSE
, VM_PROT_NONE
);
1392 if (kret
!= KERN_SUCCESS
)
1393 (void) mach_vm_deallocate(wq
->wq_map
, stackaddr
, tl
->th_allocsize
);
1395 if (kret
!= KERN_SUCCESS
) {
1396 (void) thread_terminate(th
);
1398 kfree(tl
, sizeof(struct threadlist
));
1402 thread_reference(th
);
1404 sright
= (void *) convert_thread_to_port(th
);
1405 tl
->th_thport
= (void *)ipc_port_copyout_send(sright
, get_task_ipcspace(wq
->wq_task
));
1407 thread_static_param(th
, TRUE
);
1409 workqueue_lock_spin(p
);
1411 affinity_tag
= wq
->wq_nextaffinitytag
;
1412 wq
->wq_nextaffinitytag
= (affinity_tag
+ 1) % wq
->wq_affinity_max
;
1414 workqueue_unlock(p
);
1416 tl
->th_flags
= TH_LIST_INITED
| TH_LIST_SUSPENDED
;
1420 tl
->th_stackaddr
= stackaddr
;
1421 tl
->th_affinity_tag
= affinity_tag
;
1423 #if defined(__ppc__)
1424 //ml_fp_setvalid(FALSE);
1425 thread_set_cthreadself(th
, (uint64_t)(tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
), IS_64BIT_PROCESS(p
));
1426 #endif /* __ppc__ */
1428 * affinity tag of 0 means no affinity...
1429 * but we want our tags to be 0 based because they
1430 * are used to index arrays, so...
1431 * keep it 0 based internally and bump by 1 when
1432 * calling out to set it
1434 (void)thread_affinity_set(th
, affinity_tag
+ 1);
1435 thread_sched_call(th
, workqueue_callback
);
1437 uth
= get_bsdthread_info(tl
->th_thread
);
1438 uth
->uu_threadlist
= (void *)tl
;
1440 workqueue_lock_spin(p
);
1442 TAILQ_INSERT_TAIL(&wq
->wq_thidlelist
[tl
->th_affinity_tag
], tl
, th_entry
);
1444 wq
->wq_thcount
[affinity_tag
]++;
1446 KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_START
, (int)current_thread(), affinity_tag
, wq
->wq_nthreads
, 0, (int)tl
->th_thread
);
1449 * work may have come into the queue while
1450 * no threads were available to run... since
1451 * we're adding a new thread, go evaluate the
1454 workqueue_run_nextitem(p
, THREAD_NULL
);
1456 * workqueue_run_nextitem is responsible for
1457 * dropping the workqueue lock in all cases
1464 workq_open(__unused
struct proc
*p
, __unused
struct workq_open_args
*uap
, __unused register_t
*retval
)
1466 struct workqueue
* wq
;
1473 struct workitem
* witem
;
1474 struct workitemlist
*wl
;
1478 if (p
->p_wqptr
== NULL
) {
1479 num_cpus
= ml_get_max_cpus();
1481 size
= (sizeof(struct workqueue
)) +
1482 (num_cpus
* sizeof(int *)) +
1483 (num_cpus
* sizeof(TAILQ_HEAD(, threadlist
)));
1485 ptr
= (char *)kalloc(size
);
1488 wq
= (struct workqueue
*)ptr
;
1489 wq
->wq_flags
= WQ_LIST_INITED
;
1491 wq
->wq_affinity_max
= num_cpus
;
1492 wq
->wq_task
= current_task();
1493 wq
->wq_map
= current_map();
1495 for (i
= 0; i
< WORKQUEUE_NUMPRIOS
; i
++) {
1496 wl
= (struct workitemlist
*)&wq
->wq_list
[i
];
1497 TAILQ_INIT(&wl
->wl_itemlist
);
1498 TAILQ_INIT(&wl
->wl_freelist
);
1500 for (j
= 0; j
< WORKITEM_SIZE
; j
++) {
1501 witem
= &wq
->wq_array
[(i
*WORKITEM_SIZE
) + j
];
1502 TAILQ_INSERT_TAIL(&wl
->wl_freelist
, witem
, wi_entry
);
1505 wq
->wq_thactivecount
= (uint32_t *)((char *)ptr
+ sizeof(struct workqueue
));
1506 wq
->wq_thcount
= (uint32_t *)&wq
->wq_thactivecount
[wq
->wq_affinity_max
];
1507 wq
->wq_thidlelist
= (struct wq_thidlelist
*)&wq
->wq_thcount
[wq
->wq_affinity_max
];
1509 for (i
= 0; i
< wq
->wq_affinity_max
; i
++)
1510 TAILQ_INIT(&wq
->wq_thidlelist
[i
]);
1512 TAILQ_INIT(&wq
->wq_thrunlist
);
1514 p
->p_wqptr
= (void *)wq
;
1517 workqueue_unlock(p
);
1519 wq
->wq_timer_call
= thread_call_allocate((thread_call_func_t
)workqueue_timer
, (thread_call_param_t
)wq
);
1521 for (i
= 0; i
< wq
->wq_affinity_max
; i
++) {
1522 (void)workqueue_addnewthread(wq
);
1524 /* If unable to create any threads, return error */
1525 if (wq
->wq_nthreads
== 0)
1527 workqueue_lock_spin(p
);
1529 microuptime(&wq
->wq_reduce_ts
);
1530 microuptime(&wq
->wq_lastran_ts
);
1531 wq
->wq_max_threads_scheduled
= 0;
1532 wq
->wq_stalled_count
= 0;
1534 workqueue_unlock(p
);
1540 workq_ops(struct proc
*p
, struct workq_ops_args
*uap
, __unused register_t
*retval
)
1542 int options
= uap
->options
;
1543 int prio
= uap
->prio
; /* should be used to find the right workqueue */
1544 user_addr_t item
= uap
->item
;
1546 thread_t th
= THREAD_NULL
;
1547 struct workqueue
*wq
;
1549 prio
+= 2; /* normalize prio -2 to +2 to 0 -4 */
1553 case WQOPS_QUEUE_ADD
: {
1555 KERNEL_DEBUG(0xefffd008 | DBG_FUNC_NONE
, (int)item
, 0, 0, 0, 0);
1557 workqueue_lock_spin(p
);
1559 if ((wq
= (struct workqueue
*)p
->p_wqptr
) == NULL
) {
1560 workqueue_unlock(p
);
1563 error
= workqueue_additem(wq
, prio
, item
);
1567 case WQOPS_QUEUE_REMOVE
: {
1569 workqueue_lock_spin(p
);
1571 if ((wq
= (struct workqueue
*)p
->p_wqptr
) == NULL
) {
1572 workqueue_unlock(p
);
1575 error
= workqueue_removeitem(wq
, prio
, item
);
1578 case WQOPS_THREAD_RETURN
: {
1580 th
= current_thread();
1582 KERNEL_DEBUG(0xefffd004 | DBG_FUNC_END
, 0, 0, 0, 0, 0);
1584 workqueue_lock_spin(p
);
1586 if ((wq
= (struct workqueue
*)p
->p_wqptr
) == NULL
) {
1587 workqueue_unlock(p
);
1595 workqueue_run_nextitem(p
, th
);
1597 * workqueue_run_nextitem is responsible for
1598 * dropping the workqueue lock in all cases
1604 workqueue_exit(struct proc
*p
)
1606 struct workqueue
* wq
;
1607 struct threadlist
* tl
, *tlist
;
1610 if (p
->p_wqptr
!= NULL
) {
1612 workqueue_lock_spin(p
);
1614 wq
= (struct workqueue
*)p
->p_wqptr
;
1617 workqueue_unlock(p
);
1622 if (wq
->wq_flags
& WQ_TIMER_RUNNING
)
1623 thread_call_cancel(wq
->wq_timer_call
);
1624 thread_call_free(wq
->wq_timer_call
);
1626 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thrunlist
, th_entry
, tlist
) {
1628 * drop our last ref on the thread
1630 thread_sched_call(tl
->th_thread
, NULL
);
1631 thread_deallocate(tl
->th_thread
);
1633 TAILQ_REMOVE(&wq
->wq_thrunlist
, tl
, th_entry
);
1634 kfree(tl
, sizeof(struct threadlist
));
1636 for (i
= 0; i
< wq
->wq_affinity_max
; i
++) {
1637 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thidlelist
[i
], th_entry
, tlist
) {
1639 * drop our last ref on the thread
1641 thread_sched_call(tl
->th_thread
, NULL
);
1642 thread_deallocate(tl
->th_thread
);
1644 TAILQ_REMOVE(&wq
->wq_thidlelist
[i
], tl
, th_entry
);
1645 kfree(tl
, sizeof(struct threadlist
));
1648 kfree(wq
, p
->p_wqsize
);
1653 workqueue_additem(struct workqueue
*wq
, int prio
, user_addr_t item
)
1655 struct workitem
*witem
;
1656 struct workitemlist
*wl
;
1658 wl
= (struct workitemlist
*)&wq
->wq_list
[prio
];
1660 if (TAILQ_EMPTY(&wl
->wl_freelist
))
1663 witem
= (struct workitem
*)TAILQ_FIRST(&wl
->wl_freelist
);
1664 TAILQ_REMOVE(&wl
->wl_freelist
, witem
, wi_entry
);
1666 witem
->wi_item
= item
;
1667 TAILQ_INSERT_TAIL(&wl
->wl_itemlist
, witem
, wi_entry
);
1669 if (wq
->wq_itemcount
== 0) {
1670 microuptime(&wq
->wq_lastran_ts
);
1671 wq
->wq_stalled_count
= 0;
1679 workqueue_removeitem(struct workqueue
*wq
, int prio
, user_addr_t item
)
1681 struct workitem
*witem
;
1682 struct workitemlist
*wl
;
1685 wl
= (struct workitemlist
*)&wq
->wq_list
[prio
];
1687 TAILQ_FOREACH(witem
, &wl
->wl_itemlist
, wi_entry
) {
1688 if (witem
->wi_item
== item
) {
1689 TAILQ_REMOVE(&wl
->wl_itemlist
, witem
, wi_entry
);
1692 witem
->wi_item
= (user_addr_t
)0;
1693 TAILQ_INSERT_HEAD(&wl
->wl_freelist
, witem
, wi_entry
);
1699 if (wq
->wq_itemcount
== 0)
1700 wq
->wq_flags
&= ~(WQ_ADD_TO_POOL
| WQ_TIMER_WATCH
);
1706 * workqueue_run_nextitem:
1707 * called with the workqueue lock held...
1708 * responsible for dropping it in all cases
1711 workqueue_run_nextitem(proc_t p
, thread_t thread
)
1713 struct workqueue
*wq
;
1714 struct workitem
*witem
= NULL
;
1715 user_addr_t item
= 0;
1716 thread_t th_to_run
= THREAD_NULL
;
1717 thread_t th_to_park
= THREAD_NULL
;
1718 int wake_thread
= 0;
1719 int reuse_thread
= 1;
1720 uint32_t stalled_affinity_count
= 0;
1722 uint32_t affinity_tag
;
1723 struct threadlist
*tl
= NULL
;
1724 struct uthread
*uth
= NULL
;
1725 struct workitemlist
*wl
;
1726 boolean_t start_timer
= FALSE
;
1727 struct timeval tv
, lat_tv
;
1729 wq
= (struct workqueue
*)p
->p_wqptr
;
1731 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_START
, (int)thread
, wq
->wq_threads_scheduled
, wq
->wq_stalled_count
, 0, 0);
1733 if (wq
->wq_itemcount
== 0) {
1734 if ((th_to_park
= thread
) == THREAD_NULL
)
1738 if (thread
!= THREAD_NULL
) {
1740 * we're a worker thread from the pool... currently we
1741 * are considered 'active' which means we're counted
1742 * in "wq_thactivecount"
1744 uth
= get_bsdthread_info(thread
);
1745 tl
= uth
->uu_threadlist
;
1747 if (wq
->wq_thactivecount
[tl
->th_affinity_tag
] == 1) {
1749 * we're the only active thread associated with our
1750 * affinity group, so pick up some work and keep going
1756 for (affinity_tag
= 0; affinity_tag
< wq
->wq_affinity_max
; affinity_tag
++) {
1758 * look for first affinity group that is currently not active
1759 * and has at least 1 idle thread
1761 if (wq
->wq_thactivecount
[affinity_tag
] == 0) {
1762 if (!TAILQ_EMPTY(&wq
->wq_thidlelist
[affinity_tag
]))
1764 stalled_affinity_count
++;
1767 if (thread
== THREAD_NULL
) {
1769 * we're not one of the 'worker' threads
1771 if (affinity_tag
>= wq
->wq_affinity_max
) {
1773 * we've already got at least 1 thread per
1774 * affinity group in the active state... or
1775 * we've got no idle threads to play with
1777 if (stalled_affinity_count
) {
1779 if ( !(wq
->wq_flags
& WQ_TIMER_RUNNING
) ) {
1780 wq
->wq_flags
|= WQ_TIMER_RUNNING
;
1783 wq
->wq_flags
|= WQ_TIMER_WATCH
;
1789 * we're overbooked on the affinity group we're associated with,
1790 * so park this thread
1792 th_to_park
= thread
;
1794 if (affinity_tag
>= wq
->wq_affinity_max
) {
1796 * all the affinity groups have active threads
1797 * running, or there are no idle threads to
1800 if (stalled_affinity_count
) {
1802 if ( !(wq
->wq_flags
& WQ_TIMER_RUNNING
) ) {
1803 wq
->wq_flags
|= WQ_TIMER_RUNNING
;
1806 wq
->wq_flags
|= WQ_TIMER_WATCH
;
1811 * we've got a candidate (affinity group with no currently
1812 * active threads) to start a new thread on...
1813 * we already know there is both work available
1814 * and an idle thread with the correct affinity tag, so
1815 * fall into the code that pulls a new thread and workitem...
1816 * once we've kicked that thread off, we'll park this one
1819 tl
= TAILQ_FIRST(&wq
->wq_thidlelist
[affinity_tag
]);
1820 TAILQ_REMOVE(&wq
->wq_thidlelist
[affinity_tag
], tl
, th_entry
);
1822 th_to_run
= tl
->th_thread
;
1823 TAILQ_INSERT_TAIL(&wq
->wq_thrunlist
, tl
, th_entry
);
1825 if ((tl
->th_flags
& TH_LIST_SUSPENDED
) == TH_LIST_SUSPENDED
) {
1826 tl
->th_flags
&= ~TH_LIST_SUSPENDED
;
1828 } else if ((tl
->th_flags
& TH_LIST_BLOCKED
) == TH_LIST_BLOCKED
) {
1829 tl
->th_flags
&= ~TH_LIST_BLOCKED
;
1832 tl
->th_flags
|= TH_LIST_RUNNING
;
1834 wq
->wq_threads_scheduled
++;
1836 if (wq
->wq_threads_scheduled
> wq
->wq_max_threads_scheduled
)
1837 wq
->wq_max_threads_scheduled
= wq
->wq_threads_scheduled
;
1840 for (i
= 0; i
< WORKQUEUE_NUMPRIOS
; i
++) {
1841 wl
= (struct workitemlist
*)&wq
->wq_list
[i
];
1843 if (!(TAILQ_EMPTY(&wl
->wl_itemlist
))) {
1845 witem
= TAILQ_FIRST(&wl
->wl_itemlist
);
1846 TAILQ_REMOVE(&wl
->wl_itemlist
, witem
, wi_entry
);
1849 item
= witem
->wi_item
;
1850 witem
->wi_item
= (user_addr_t
)0;
1851 TAILQ_INSERT_HEAD(&wl
->wl_freelist
, witem
, wi_entry
);
1857 panic("workq_run_nextitem: NULL witem");
1859 if (thread
!= th_to_run
) {
1861 * we're starting up a thread from a parked/suspended condition
1863 OSAddAtomic(1, (SInt32
*)&wq
->wq_thactivecount
[tl
->th_affinity_tag
]);
1864 OSAddAtomic(1, (SInt32
*)&tl
->th_unparked
);
1866 if (wq
->wq_itemcount
== 0)
1867 wq
->wq_flags
&= ~WQ_TIMER_WATCH
;
1871 * if we had any affinity groups stall (no threads runnable)
1872 * since we last scheduled an item... and
1873 * the elapsed time since we last scheduled an item
1874 * exceeds the latency tolerance...
1875 * we ask the timer thread (which should already be running)
1876 * to add some more threads to the pool
1878 if (wq
->wq_stalled_count
&& !(wq
->wq_flags
& WQ_ADD_TO_POOL
)) {
1879 timersub(&tv
, &wq
->wq_lastran_ts
, &lat_tv
);
1881 if (((lat_tv
.tv_sec
* 1000000) + lat_tv
.tv_usec
) > wq_max_run_latency_usecs
)
1882 wq
->wq_flags
|= WQ_ADD_TO_POOL
;
1884 KERNEL_DEBUG(0xefffd10c, wq
->wq_stalled_count
, lat_tv
.tv_sec
, lat_tv
.tv_usec
, wq
->wq_flags
, 0);
1886 wq
->wq_lastran_ts
= tv
;
1888 wq
->wq_stalled_count
= 0;
1889 workqueue_unlock(p
);
1891 KERNEL_DEBUG(0xefffd02c, wq
->wq_thactivecount
[0], wq
->wq_thactivecount
[1],
1892 wq
->wq_thactivecount
[2], wq
->wq_thactivecount
[3], 0);
1894 KERNEL_DEBUG(0xefffd02c, wq
->wq_thactivecount
[4], wq
->wq_thactivecount
[5],
1895 wq
->wq_thactivecount
[6], wq
->wq_thactivecount
[7], 0);
1898 * if current thread is reused for workitem, does not return via unix_syscall
1900 wq_runitem(p
, item
, th_to_run
, tl
, reuse_thread
, wake_thread
, (thread
== th_to_run
));
1902 if (th_to_park
== THREAD_NULL
) {
1904 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END
, (int)thread
, (int)item
, wq
->wq_flags
, 1, 0);
1908 workqueue_lock_spin(p
);
1911 wq
->wq_threads_scheduled
--;
1913 * this is a workqueue thread with no more
1914 * work to do... park it for now
1916 uth
= get_bsdthread_info(th_to_park
);
1917 tl
= uth
->uu_threadlist
;
1919 panic("wq thread with no threadlist ");
1921 TAILQ_REMOVE(&wq
->wq_thrunlist
, tl
, th_entry
);
1922 tl
->th_flags
&= ~TH_LIST_RUNNING
;
1924 tl
->th_flags
|= TH_LIST_BLOCKED
;
1925 TAILQ_INSERT_HEAD(&wq
->wq_thidlelist
[tl
->th_affinity_tag
], tl
, th_entry
);
1927 assert_wait((caddr_t
)tl
, (THREAD_INTERRUPTIBLE
));
1929 workqueue_unlock(p
);
1932 workqueue_interval_timer_start(wq
->wq_timer_call
, wq_timer_interval_msecs
);
1934 KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_START
, (int)current_thread(), wq
->wq_threads_scheduled
, 0, 0, (int)th_to_park
);
1936 thread_block((thread_continue_t
)thread_exception_return
);
1938 panic("unexpected return from thread_block");
1941 workqueue_unlock(p
);
1944 workqueue_interval_timer_start(wq
->wq_timer_call
, wq_timer_interval_msecs
);
1946 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END
, (int)thread
, 0, wq
->wq_flags
, 2, 0);
1952 wq_runitem(proc_t p
, user_addr_t item
, thread_t th
, struct threadlist
*tl
,
1953 int reuse_thread
, int wake_thread
, int return_directly
)
1957 KERNEL_DEBUG1(0xefffd004 | DBG_FUNC_START
, (int)current_thread(), (int)item
, wake_thread
, tl
->th_affinity_tag
, (int)th
);
1959 ret
= setup_wqthread(p
, th
, item
, reuse_thread
, tl
);
1962 panic("setup_wqthread failed %x\n", ret
);
1964 if (return_directly
) {
1965 thread_exception_return();
1967 panic("wq_runitem: thread_exception_return returned ...\n");
1970 KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_END
, (int)current_thread(), 0, 0, 0, (int)th
);
1974 KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END
, (int)current_thread(), 0, 0, 0, (int)th
);
1982 setup_wqthread(proc_t p
, thread_t th
, user_addr_t item
, int reuse_thread
, struct threadlist
*tl
)
1985 #if defined(__ppc__)
1987 * Set up PowerPC registers...
1988 * internally they are always kept as 64 bit and
1989 * since the register set is the same between 32 and 64bit modes
1990 * we don't need 2 different methods for setting the state
1993 ppc_thread_state64_t state64
;
1994 ppc_thread_state64_t
*ts64
= &state64
;
1996 ts64
->srr0
= (uint64_t)p
->p_wqthread
;
1997 ts64
->r1
= (uint64_t)((tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
) - C_ARGSAVE_LEN
- C_RED_ZONE
);
1998 ts64
->r3
= (uint64_t)(tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
);
1999 ts64
->r4
= (uint64_t)((unsigned int)tl
->th_thport
);
2000 ts64
->r5
= (uint64_t)(tl
->th_stackaddr
+ PTH_DEFAULT_GUARDSIZE
);
2001 ts64
->r6
= (uint64_t)item
;
2002 ts64
->r7
= (uint64_t)reuse_thread
;
2003 ts64
->r8
= (uint64_t)0;
2005 thread_set_wq_state64(th
, (thread_state_t
)ts64
);
2007 #elif defined(__i386__)
2010 isLP64
= IS_64BIT_PROCESS(p
);
2012 * Set up i386 registers & function call.
2015 x86_thread_state32_t state
;
2016 x86_thread_state32_t
*ts
= &state
;
2018 ts
->eip
= (int)p
->p_wqthread
;
2019 ts
->eax
= (unsigned int)(tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
);
2020 ts
->ebx
= (unsigned int)tl
->th_thport
;
2021 ts
->ecx
= (unsigned int)(tl
->th_stackaddr
+ PTH_DEFAULT_GUARDSIZE
);
2022 ts
->edx
= (unsigned int)item
;
2023 ts
->edi
= (unsigned int)reuse_thread
;
2024 ts
->esi
= (unsigned int)0;
2028 ts
->esp
= (int)((vm_offset_t
)((tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
) - C_32_STK_ALIGN
));
2030 thread_set_wq_state32(th
, (thread_state_t
)ts
);
2033 x86_thread_state64_t state64
;
2034 x86_thread_state64_t
*ts64
= &state64
;
2036 ts64
->rip
= (uint64_t)p
->p_wqthread
;
2037 ts64
->rdi
= (uint64_t)(tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
);
2038 ts64
->rsi
= (uint64_t)((unsigned int)(tl
->th_thport
));
2039 ts64
->rdx
= (uint64_t)(tl
->th_stackaddr
+ PTH_DEFAULT_GUARDSIZE
);
2040 ts64
->rcx
= (uint64_t)item
;
2041 ts64
->r8
= (uint64_t)reuse_thread
;
2042 ts64
->r9
= (uint64_t)0;
2045 * set stack pointer aligned to 16 byte boundary
2047 ts64
->rsp
= (uint64_t)((tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
) - C_64_REDZONE_LEN
);
2049 thread_set_wq_state64(th
, (thread_state_t
)ts64
);
2052 #error setup_wqthread not defined for this architecture