2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
33 #define _PTHREAD_CONDATTR_T
34 #define _PTHREAD_COND_T
35 #define _PTHREAD_MUTEXATTR_T
36 #define _PTHREAD_MUTEX_T
37 #define _PTHREAD_RWLOCKATTR_T
38 #define _PTHREAD_RWLOCK_T
40 #undef pthread_mutexattr_t
41 #undef pthread_mutex_t
42 #undef pthread_condattr_t
44 #undef pthread_rwlockattr_t
45 #undef pthread_rwlock_t
47 #include <sys/param.h>
48 #include <sys/queue.h>
49 #include <sys/resourcevar.h>
50 #include <sys/proc_internal.h>
51 #include <sys/kauth.h>
52 #include <sys/systm.h>
53 #include <sys/timeb.h>
54 #include <sys/times.h>
56 #include <sys/kernel.h>
58 #include <sys/signalvar.h>
59 #include <sys/syslog.h>
62 #include <sys/kdebug.h>
63 #include <sys/sysproto.h>
64 #include <sys/pthread_internal.h>
66 #include <sys/user.h> /* for coredump */
69 #include <mach/mach_types.h>
70 #include <mach/vm_prot.h>
71 #include <mach/semaphore.h>
72 #include <mach/sync_policy.h>
73 #include <mach/task.h>
74 #include <kern/kern_types.h>
75 #include <kern/task.h>
76 #include <kern/clock.h>
77 #include <mach/kern_return.h>
78 #include <kern/thread.h>
79 #include <kern/sched_prim.h>
80 #include <kern/kalloc.h>
81 #include <kern/sched_prim.h> /* for thread_exception_return */
82 #include <kern/processor.h>
83 #include <kern/affinity.h>
84 #include <mach/mach_vm.h>
85 #include <mach/mach_param.h>
86 #include <mach/thread_status.h>
87 #include <mach/thread_policy.h>
88 #include <mach/message.h>
89 #include <mach/port.h>
90 #include <vm/vm_protos.h>
91 #include <vm/vm_map.h>` /* for current_map() */
92 #include <mach/thread_act.h> /* for thread_resume */
93 #include <machine/machine_routines.h>
95 #include <i386/machine_routines.h>
96 #include <i386/eflags.h>
101 #include <libkern/OSAtomic.h>
105 #define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
107 #define KERNEL_DEBUG1 KERNEL_DEBUG_CONSTANT1
111 #if defined(__ppc__) || defined(__ppc64__)
112 #include <architecture/ppc/cframe.h>
116 lck_grp_attr_t
*pthread_lck_grp_attr
;
117 lck_grp_t
*pthread_lck_grp
;
118 lck_attr_t
*pthread_lck_attr
;
119 lck_mtx_t
* pthread_list_mlock
;
120 extern void pthread_init(void);
122 extern kern_return_t
thread_getstatus(register thread_t act
, int flavor
,
123 thread_state_t tstate
, mach_msg_type_number_t
*count
);
124 extern kern_return_t
thread_setstatus(thread_t thread
, int flavor
,
125 thread_state_t tstate
, mach_msg_type_number_t count
);
126 extern void thread_set_cthreadself(thread_t thread
, uint64_t pself
, int isLP64
);
127 extern kern_return_t
mach_port_deallocate(ipc_space_t
, mach_port_name_t
);
128 extern kern_return_t
semaphore_signal_internal_trap(mach_port_name_t
);
130 static int workqueue_additem(struct workqueue
*wq
, int prio
, user_addr_t item
);
131 static int workqueue_removeitem(struct workqueue
*wq
, int prio
, user_addr_t item
);
132 static void workqueue_run_nextitem(proc_t p
, thread_t th
);
133 static void wq_runitem(proc_t p
, user_addr_t item
, thread_t th
, struct threadlist
*tl
,
134 int reuse_thread
, int wake_thread
, int return_directly
);
135 static int setup_wqthread(proc_t p
, thread_t th
, user_addr_t item
, int reuse_thread
, struct threadlist
*tl
);
136 static int workqueue_addnewthread(struct workqueue
*wq
);
137 static void workqueue_removethread(struct workqueue
*wq
);
138 static void workqueue_lock(proc_t
);
139 static void workqueue_lock_spin(proc_t
);
140 static void workqueue_unlock(proc_t
);
142 #define C_32_STK_ALIGN 16
143 #define C_64_STK_ALIGN 16
144 #define C_64_REDZONE_LEN 128
145 #define TRUNC_DOWN32(a,c) ((((uint32_t)a)-(c)) & ((uint32_t)(-(c))))
146 #define TRUNC_DOWN64(a,c) ((((uint64_t)a)-(c)) & ((uint64_t)(-(c))))
150 * Flags filed passed to bsdthread_create and back in pthread_start
151 31 <---------------------------------> 0
152 _________________________________________
153 | flags(8) | policy(8) | importance(16) |
154 -----------------------------------------
156 void _pthread_start(pthread_t self
, mach_port_t kport
, void *(*fun
)(void *), void * funarg
, size_t stacksize
, unsigned int flags
);
158 #define PTHREAD_START_CUSTOM 0x01000000
159 #define PTHREAD_START_SETSCHED 0x02000000
160 #define PTHREAD_START_DETACHED 0x04000000
161 #define PTHREAD_START_POLICY_BITSHIFT 16
162 #define PTHREAD_START_POLICY_MASK 0xff
163 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
165 #define SCHED_OTHER POLICY_TIMESHARE
166 #define SCHED_FIFO POLICY_FIFO
167 #define SCHED_RR POLICY_RR
173 pthread_lck_grp_attr
= lck_grp_attr_alloc_init();
174 pthread_lck_grp
= lck_grp_alloc_init("pthread", pthread_lck_grp_attr
);
177 * allocate the lock attribute for pthread synchronizers
179 pthread_lck_attr
= lck_attr_alloc_init();
181 pthread_list_mlock
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
186 pthread_list_lock(void)
188 lck_mtx_lock(pthread_list_mlock
);
192 pthread_list_unlock(void)
194 lck_mtx_unlock(pthread_list_mlock
);
199 __pthread_mutex_destroy(__unused
struct proc
*p
, struct __pthread_mutex_destroy_args
*uap
, __unused register_t
*retval
)
202 int mutexid
= uap
->mutexid
;
203 pthread_mutex_t
* mutex
;
208 mutex
= pthread_id_to_mutex(mutexid
);
212 MTX_LOCK(mutex
->lock
);
213 if (mutex
->sig
== _PTHREAD_KERN_MUTEX_SIG
)
215 if (mutex
->owner
== (thread_t
)NULL
&&
216 mutex
->refcount
== 1)
218 mutex
->sig
= _PTHREAD_NO_SIG
;
222 pthread_id_mutex_remove(mutexid
);
224 MTX_UNLOCK(mutex
->lock
);
225 lck_mtx_free(lmtx
, pthread_lck_grp
);
226 lck_mtx_free(lmtx1
, pthread_lck_grp
);
227 kfree((void *)mutex
, sizeof(struct _pthread_mutex
));
235 MTX_UNLOCK(mutex
->lock
);
236 pthread_mutex_release(mutex
);
241 * Initialize a mutex variable, possibly with additional attributes.
244 pthread_mutex_init_internal(pthread_mutex_t
*mutex
, const pthread_mutexattr_t
*attr
)
246 mutex
->prioceiling
= attr
->prioceiling
;
247 mutex
->protocol
= attr
->protocol
;
248 mutex
->type
= attr
->type
;
249 mutex
->pshared
= attr
->pshared
;
251 mutex
->owner
= (thread_t
)NULL
;
252 mutex
->owner_proc
= current_proc();
253 mutex
->sig
= _PTHREAD_KERN_MUTEX_SIG
;
254 mutex
->lock
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
255 mutex
->mutex
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
259 * Initialize a mutex variable, possibly with additional attributes.
260 * Public interface - so don't trust the lock - initialize it first.
263 __pthread_mutex_init(__unused
struct proc
*p
, struct __pthread_mutex_init_args
*uap
, __unused register_t
*retval
)
265 user_addr_t umutex
= uap
->mutex
;
266 pthread_mutex_t
* mutex
;
267 user_addr_t uattr
= uap
->attr
;
268 pthread_mutexattr_t attr
;
269 unsigned int addr
= (unsigned int)((uintptr_t)uap
->mutex
);
274 if ((umutex
== 0) || (uattr
== 0))
277 if ((error
= copyin(uattr
, &attr
, sizeof(pthread_mutexattr_t
))))
280 if (attr
.sig
!= _PTHREAD_MUTEX_ATTR_SIG
)
283 if ((error
= copyin(umutex
, &pmutex_sig
, sizeof(int))))
286 if (pmutex_sig
== _PTHREAD_KERN_MUTEX_SIG
)
288 mutex
= (pthread_mutex_t
*)kalloc(sizeof(pthread_mutex_t
));
290 pthread_mutex_init_internal(mutex
, &attr
);
294 mutexid
= pthread_id_mutex_add(mutex
);
296 if ((error
= copyout(&mutexid
, ((user_addr_t
)((uintptr_t)(addr
))), 4)))
303 pthread_id_mutex_remove(mutexid
);
304 lck_mtx_free(mutex
->lock
, pthread_lck_grp
);
305 lck_mtx_free(mutex
->mutex
, pthread_lck_grp
);
306 kfree(mutex
, sizeof(struct _pthread_mutex
));
312 * TODO: Priority inheritance stuff
315 __pthread_mutex_lock(struct proc
*p
, struct __pthread_mutex_lock_args
*uap
, __unused register_t
*retval
)
317 int mutexid
= uap
->mutexid
;
318 pthread_mutex_t
* mutex
;
321 mutex
= pthread_id_to_mutex(mutexid
);
325 MTX_LOCK(mutex
->lock
);
327 if (mutex
->sig
!= _PTHREAD_KERN_MUTEX_SIG
)
333 if ((p
!= mutex
->owner_proc
) && (mutex
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
338 MTX_UNLOCK(mutex
->lock
);
340 lck_mtx_lock(mutex
->mutex
);
342 MTX_LOCK(mutex
->lock
);
343 mutex
->owner
= current_thread();
346 MTX_UNLOCK(mutex
->lock
);
347 pthread_mutex_release(mutex
);
352 * Attempt to lock a mutex, but don't block if this isn't possible.
355 __pthread_mutex_trylock(struct proc
*p
, struct __pthread_mutex_trylock_args
*uap
, __unused register_t
*retval
)
357 int mutexid
= uap
->mutexid
;
358 pthread_mutex_t
* mutex
;
362 mutex
= pthread_id_to_mutex(mutexid
);
366 MTX_LOCK(mutex
->lock
);
368 if (mutex
->sig
!= _PTHREAD_KERN_MUTEX_SIG
)
374 if ((p
!= mutex
->owner_proc
) && (mutex
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
379 MTX_UNLOCK(mutex
->lock
);
381 state
= lck_mtx_try_lock(mutex
->mutex
);
383 MTX_LOCK(mutex
->lock
);
384 mutex
->owner
= current_thread();
385 MTX_UNLOCK(mutex
->lock
);
390 pthread_mutex_release(mutex
);
393 MTX_UNLOCK(mutex
->lock
);
394 pthread_mutex_release(mutex
);
400 * TODO: Priority inheritance stuff
403 __pthread_mutex_unlock(struct proc
*p
, struct __pthread_mutex_unlock_args
*uap
, __unused register_t
*retval
)
405 int mutexid
= uap
->mutexid
;
406 pthread_mutex_t
* mutex
;
409 mutex
= pthread_id_to_mutex(mutexid
);
413 MTX_LOCK(mutex
->lock
);
415 if (mutex
->sig
!= _PTHREAD_KERN_MUTEX_SIG
)
421 if ((p
!= mutex
->owner_proc
) && (mutex
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
426 MTX_UNLOCK(mutex
->lock
);
428 lck_mtx_unlock(mutex
->mutex
);
430 MTX_LOCK(mutex
->lock
);
434 MTX_UNLOCK(mutex
->lock
);
435 pthread_mutex_release(mutex
);
441 __pthread_cond_init(__unused
struct proc
*p
, struct __pthread_cond_init_args
*uap
, __unused register_t
*retval
)
443 pthread_cond_t
* cond
;
444 pthread_condattr_t attr
;
445 user_addr_t ucond
= uap
->cond
;
446 user_addr_t uattr
= uap
->attr
;
447 unsigned int addr
= (unsigned int)((uintptr_t)uap
->cond
);
448 int condid
, error
, cond_sig
;
453 if ((ucond
== 0) || (uattr
== 0))
456 if ((error
= copyin(uattr
, &attr
, sizeof(pthread_condattr_t
))))
459 if (attr
.sig
!= _PTHREAD_COND_ATTR_SIG
)
462 if ((error
= copyin(ucond
, &cond_sig
, sizeof(int))))
465 if (cond_sig
== _PTHREAD_KERN_COND_SIG
)
467 kret
= semaphore_create(kernel_task
, &sem
, SYNC_POLICY_FIFO
, value
);
468 if (kret
!= KERN_SUCCESS
)
471 cond
= (pthread_cond_t
*)kalloc(sizeof(pthread_cond_t
));
473 cond
->lock
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
474 cond
->pshared
= attr
.pshared
;
475 cond
->sig
= _PTHREAD_KERN_COND_SIG
;
476 cond
->sigpending
= 0;
479 cond
->mutex
= (pthread_mutex_t
*)0;
480 cond
->owner_proc
= current_proc();
484 condid
= pthread_id_cond_add(cond
);
486 if ((error
= copyout(&condid
, ((user_addr_t
)((uintptr_t)(addr
))), 4)))
493 pthread_id_cond_remove(condid
);
494 semaphore_destroy(kernel_task
, cond
->sem
);
495 kfree(cond
, sizeof(pthread_cond_t
));
501 * Destroy a condition variable.
504 __pthread_cond_destroy(__unused
struct proc
*p
, struct __pthread_cond_destroy_args
*uap
, __unused register_t
*retval
)
506 pthread_cond_t
*cond
;
507 int condid
= uap
->condid
;
512 cond
= pthread_id_to_cond(condid
);
516 COND_LOCK(cond
->lock
);
517 if (cond
->sig
== _PTHREAD_KERN_COND_SIG
)
519 if (cond
->refcount
== 1)
521 cond
->sig
= _PTHREAD_NO_SIG
;
525 pthread_id_cond_remove(condid
);
527 COND_UNLOCK(cond
->lock
);
528 lck_mtx_free(lmtx
, pthread_lck_grp
);
529 (void)semaphore_destroy(kernel_task
, sem
);
530 kfree((void *)cond
, sizeof(pthread_cond_t
));
538 COND_UNLOCK(cond
->lock
);
539 pthread_cond_release(cond
);
545 * Signal a condition variable, waking up all threads waiting for it.
548 __pthread_cond_broadcast(__unused
struct proc
*p
, struct __pthread_cond_broadcast_args
*uap
, __unused register_t
*retval
)
550 int condid
= uap
->condid
;
551 pthread_cond_t
* cond
;
555 cond
= pthread_id_to_cond(condid
);
559 COND_LOCK(cond
->lock
);
561 if (cond
->sig
!= _PTHREAD_KERN_COND_SIG
)
567 if ((p
!= cond
->owner_proc
) && (cond
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
572 COND_UNLOCK(cond
->lock
);
574 kret
= semaphore_signal_all(cond
->sem
);
576 case KERN_INVALID_ADDRESS
:
577 case KERN_PROTECTION_FAILURE
:
581 case KERN_OPERATION_TIMED_OUT
:
592 COND_LOCK(cond
->lock
);
594 COND_UNLOCK(cond
->lock
);
595 pthread_cond_release(cond
);
601 * Signal a condition variable, waking only one thread.
604 __pthread_cond_signal(__unused
struct proc
*p
, struct __pthread_cond_signal_args
*uap
, __unused register_t
*retval
)
606 int condid
= uap
->condid
;
607 pthread_cond_t
* cond
;
611 cond
= pthread_id_to_cond(condid
);
615 COND_LOCK(cond
->lock
);
617 if (cond
->sig
!= _PTHREAD_KERN_COND_SIG
)
623 if ((p
!= cond
->owner_proc
) && (cond
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
628 COND_UNLOCK(cond
->lock
);
630 kret
= semaphore_signal(cond
->sem
);
632 case KERN_INVALID_ADDRESS
:
633 case KERN_PROTECTION_FAILURE
:
637 case KERN_OPERATION_TIMED_OUT
:
648 COND_LOCK(cond
->lock
);
650 COND_UNLOCK(cond
->lock
);
651 pthread_cond_release(cond
);
657 __pthread_cond_wait(__unused
struct proc
*p
, struct __pthread_cond_wait_args
*uap
, __unused register_t
*retval
)
659 int condid
= uap
->condid
;
660 pthread_cond_t
* cond
;
661 int mutexid
= uap
->mutexid
;
662 pthread_mutex_t
* mutex
;
666 cond
= pthread_id_to_cond(condid
);
670 mutex
= pthread_id_to_mutex(mutexid
);
672 pthread_cond_release(cond
);
675 COND_LOCK(cond
->lock
);
677 if (cond
->sig
!= _PTHREAD_KERN_COND_SIG
)
683 if ((p
!= cond
->owner_proc
) && (cond
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
688 COND_UNLOCK(cond
->lock
);
690 kret
= semaphore_wait(cond
->sem
);
692 case KERN_INVALID_ADDRESS
:
693 case KERN_PROTECTION_FAILURE
:
697 case KERN_OPERATION_TIMED_OUT
:
708 COND_LOCK(cond
->lock
);
710 COND_UNLOCK(cond
->lock
);
711 pthread_cond_release(cond
);
712 pthread_mutex_release(mutex
);
717 __pthread_cond_timedwait(__unused
struct proc
*p
, struct __pthread_cond_timedwait_args
*uap
, __unused register_t
*retval
)
719 int condid
= uap
->condid
;
720 pthread_cond_t
* cond
;
721 int mutexid
= uap
->mutexid
;
722 pthread_mutex_t
* mutex
;
723 mach_timespec_t absts
;
731 if ((error
= copyin(uap
->abstime
, &absts
, sizeof(mach_timespec_t
))))
733 cond
= pthread_id_to_cond(condid
);
737 mutex
= pthread_id_to_mutex(mutexid
);
739 pthread_cond_release(cond
);
742 COND_LOCK(cond
->lock
);
744 if (cond
->sig
!= _PTHREAD_KERN_COND_SIG
)
750 if ((p
!= cond
->owner_proc
) && (cond
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
755 COND_UNLOCK(cond
->lock
);
757 kret
= semaphore_timedwait(cond
->sem
, absts
);
759 case KERN_INVALID_ADDRESS
:
760 case KERN_PROTECTION_FAILURE
:
764 case KERN_OPERATION_TIMED_OUT
:
775 COND_LOCK(cond
->lock
);
777 COND_UNLOCK(cond
->lock
);
778 pthread_cond_release(cond
);
779 pthread_mutex_release(mutex
);
784 bsdthread_create(__unused
struct proc
*p
, struct bsdthread_create_args
*uap
, user_addr_t
*retval
)
790 mach_vm_offset_t stackaddr
;
791 mach_vm_size_t th_allocsize
= 0;
792 mach_vm_size_t user_stacksize
;
793 mach_vm_size_t th_stacksize
;
794 mach_vm_offset_t th_stackaddr
;
795 mach_vm_offset_t th_stack
;
796 mach_vm_offset_t th_pthread
;
797 mach_port_t th_thport
;
799 user_addr_t user_func
= uap
->func
;
800 user_addr_t user_funcarg
= uap
->func_arg
;
801 user_addr_t user_stack
= uap
->stack
;
802 user_addr_t user_pthread
= uap
->pthread
;
803 unsigned int flags
= (unsigned int)uap
->flags
;
804 vm_map_t vmap
= current_map();
805 task_t ctask
= current_task();
806 unsigned int policy
, importance
;
812 KERNEL_DEBUG_CONSTANT(0x9000080 | DBG_FUNC_START
, flags
, 0, 0, 0, 0);
815 isLP64
= IS_64BIT_PROCESS(p
);
819 stackaddr
= 0xF0000000;
820 #elif defined(__i386__)
821 stackaddr
= 0xB0000000;
823 #error Need to define a stack address hint for this architecture
825 kret
= thread_create(ctask
, &th
);
826 if (kret
!= KERN_SUCCESS
)
828 thread_reference(th
);
830 sright
= (void *) convert_thread_to_port(th
);
831 th_thport
= (void *)ipc_port_copyout_send(sright
, get_task_ipcspace(ctask
));
833 if ((flags
& PTHREAD_START_CUSTOM
) == 0) {
834 th_stacksize
= (mach_vm_size_t
)user_stack
; /* if it is custom them it is stacksize */
835 th_allocsize
= th_stacksize
+ PTH_DEFAULT_GUARDSIZE
+ p
->p_pthsize
;
837 kret
= mach_vm_map(vmap
, &stackaddr
,
840 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
, NULL
,
841 0, FALSE
, VM_PROT_DEFAULT
, VM_PROT_ALL
,
843 if (kret
!= KERN_SUCCESS
)
844 kret
= mach_vm_allocate(vmap
,
845 &stackaddr
, th_allocsize
,
846 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
);
847 if (kret
!= KERN_SUCCESS
) {
852 KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_NONE
, th_allocsize
, stackaddr
, 0, 2, 0);
854 th_stackaddr
= stackaddr
;
857 * The guard page is at the lowest address
858 * The stack base is the highest address
860 kret
= mach_vm_protect(vmap
, stackaddr
, PTH_DEFAULT_GUARDSIZE
, FALSE
, VM_PROT_NONE
);
862 if (kret
!= KERN_SUCCESS
) {
866 th_stack
= (stackaddr
+ th_stacksize
+ PTH_DEFAULT_GUARDSIZE
);
867 th_pthread
= (stackaddr
+ th_stacksize
+ PTH_DEFAULT_GUARDSIZE
);
868 user_stacksize
= th_stacksize
;
870 th_stack
= user_stack
;
871 user_stacksize
= user_stack
;
872 th_pthread
= user_pthread
;
874 KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_NONE
, 0, 0, 0, 3, 0);
880 * Set up PowerPC registers...
881 * internally they are always kept as 64 bit and
882 * since the register set is the same between 32 and 64bit modes
883 * we don't need 2 different methods for setting the state
886 ppc_thread_state64_t state64
;
887 ppc_thread_state64_t
*ts64
= &state64
;
889 ts64
->srr0
= (uint64_t)p
->p_threadstart
;
890 ts64
->r1
= (uint64_t)(th_stack
- C_ARGSAVE_LEN
- C_RED_ZONE
);
891 ts64
->r3
= (uint64_t)th_pthread
;
892 ts64
->r4
= (uint64_t)((unsigned int)th_thport
);
893 ts64
->r5
= (uint64_t)user_func
;
894 ts64
->r6
= (uint64_t)user_funcarg
;
895 ts64
->r7
= (uint64_t)user_stacksize
;
896 ts64
->r8
= (uint64_t)uap
->flags
;
898 thread_set_wq_state64(th
, (thread_state_t
)ts64
);
900 thread_set_cthreadself(th
, (uint64_t)th_pthread
, isLP64
);
902 #elif defined(__i386__)
905 * Set up i386 registers & function call.
908 x86_thread_state32_t state
;
909 x86_thread_state32_t
*ts
= &state
;
911 ts
->eip
= (int)p
->p_threadstart
;
912 ts
->eax
= (unsigned int)th_pthread
;
913 ts
->ebx
= (unsigned int)th_thport
;
914 ts
->ecx
= (unsigned int)user_func
;
915 ts
->edx
= (unsigned int)user_funcarg
;
916 ts
->edi
= (unsigned int)user_stacksize
;
917 ts
->esi
= (unsigned int)uap
->flags
;
921 ts
->esp
= (int)((vm_offset_t
)(th_stack
-C_32_STK_ALIGN
));
923 thread_set_wq_state32(th
, (thread_state_t
)ts
);
926 x86_thread_state64_t state64
;
927 x86_thread_state64_t
*ts64
= &state64
;
929 ts64
->rip
= (uint64_t)p
->p_threadstart
;
930 ts64
->rdi
= (uint64_t)th_pthread
;
931 ts64
->rsi
= (uint64_t)((unsigned int)(th_thport
));
932 ts64
->rdx
= (uint64_t)user_func
;
933 ts64
->rcx
= (uint64_t)user_funcarg
;
934 ts64
->r8
= (uint64_t)user_stacksize
;
935 ts64
->r9
= (uint64_t)uap
->flags
;
937 * set stack pointer aligned to 16 byte boundary
939 ts64
->rsp
= (uint64_t)(th_stack
- C_64_REDZONE_LEN
);
941 thread_set_wq_state64(th
, (thread_state_t
)ts64
);
945 #error bsdthread_create not defined for this architecture
947 /* Set scheduling parameters if needed */
948 if ((flags
& PTHREAD_START_SETSCHED
) != 0) {
949 thread_extended_policy_data_t extinfo
;
950 thread_precedence_policy_data_t precedinfo
;
952 importance
= (flags
& PTHREAD_START_IMPORTANCE_MASK
);
953 policy
= (flags
>> PTHREAD_START_POLICY_BITSHIFT
) & PTHREAD_START_POLICY_MASK
;
955 if (policy
== SCHED_OTHER
)
956 extinfo
.timeshare
= 1;
958 extinfo
.timeshare
= 0;
959 thread_policy_set(th
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extinfo
, THREAD_EXTENDED_POLICY_COUNT
);
961 #define BASEPRI_DEFAULT 31
962 precedinfo
.importance
= (importance
- BASEPRI_DEFAULT
);
963 thread_policy_set(th
, THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&precedinfo
, THREAD_PRECEDENCE_POLICY_COUNT
);
966 kret
= thread_resume(th
);
967 if (kret
!= KERN_SUCCESS
) {
971 thread_deallocate(th
); /* drop the creator reference */
973 KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_END
, error
, (unsigned int)th_pthread
, 0, 0, 0);
975 *retval
= th_pthread
;
981 (void)mach_vm_deallocate(vmap
, stackaddr
, th_allocsize
);
983 (void)mach_port_deallocate(get_task_ipcspace(ctask
), (mach_port_name_t
)th_thport
);
984 (void)thread_terminate(th
);
985 (void)thread_deallocate(th
);
990 bsdthread_terminate(__unused
struct proc
*p
, struct bsdthread_terminate_args
*uap
, __unused register_t
*retval
)
992 mach_vm_offset_t freeaddr
;
993 mach_vm_size_t freesize
;
995 mach_port_name_t kthport
= (mach_port_name_t
)uap
->port
;
996 mach_port_name_t sem
= (mach_port_name_t
)uap
->sem
;
998 freeaddr
= (mach_vm_offset_t
)uap
->stackaddr
;
999 freesize
= uap
->freesize
;
1002 KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_START
, (unsigned int)freeaddr
, (unsigned int)freesize
, (unsigned int)kthport
, 0xff, 0);
1004 if ((freesize
!= (mach_vm_size_t
)0) && (freeaddr
!= (mach_vm_offset_t
)0)) {
1005 kret
= mach_vm_deallocate(current_map(), freeaddr
, freesize
);
1006 if (kret
!= KERN_SUCCESS
) {
1011 (void) thread_terminate(current_thread());
1012 if (sem
!= MACH_PORT_NULL
) {
1013 kret
= semaphore_signal_internal_trap(sem
);
1014 if (kret
!= KERN_SUCCESS
) {
1019 if (kthport
!= MACH_PORT_NULL
)
1020 mach_port_deallocate(get_task_ipcspace(current_task()), kthport
);
1021 thread_exception_return();
1022 panic("bsdthread_terminate: still running\n");
1024 KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_END
, 0, 0, 0, 0xff, 0);
1031 bsdthread_register(struct proc
*p
, struct bsdthread_register_args
*uap
, __unused register_t
*retval
)
1033 /* syscall randomizer test can pass bogus values */
1034 if (uap
->pthsize
> MAX_PTHREAD_SIZE
) {
1037 p
->p_threadstart
= uap
->threadstart
;
1038 p
->p_wqthread
= uap
->wqthread
;
1039 p
->p_pthsize
= uap
->pthsize
;
1047 int wq_stalled_window_usecs
= WQ_STALLED_WINDOW_USECS
;
1048 int wq_reduce_pool_window_usecs
= WQ_REDUCE_POOL_WINDOW_USECS
;
1049 int wq_max_run_latency_usecs
= WQ_MAX_RUN_LATENCY_USECS
;
1050 int wq_timer_interval_msecs
= WQ_TIMER_INTERVAL_MSECS
;
1053 SYSCTL_INT(_kern
, OID_AUTO
, wq_stalled_window_usecs
, CTLFLAG_RW
,
1054 &wq_stalled_window_usecs
, 0, "");
1056 SYSCTL_INT(_kern
, OID_AUTO
, wq_reduce_pool_window_usecs
, CTLFLAG_RW
,
1057 &wq_reduce_pool_window_usecs
, 0, "");
1059 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_run_latency_usecs
, CTLFLAG_RW
,
1060 &wq_max_run_latency_usecs
, 0, "");
1062 SYSCTL_INT(_kern
, OID_AUTO
, wq_timer_interval_msecs
, CTLFLAG_RW
,
1063 &wq_timer_interval_msecs
, 0, "");
1069 workqueue_init_lock(proc_t p
)
1071 lck_mtx_init(&p
->p_wqlock
, pthread_lck_grp
, pthread_lck_attr
);
1075 workqueue_destroy_lock(proc_t p
)
1077 lck_mtx_destroy(&p
->p_wqlock
, pthread_lck_grp
);
1081 workqueue_lock(proc_t p
)
1083 lck_mtx_lock(&p
->p_wqlock
);
1087 workqueue_lock_spin(proc_t p
)
1089 lck_mtx_lock_spin(&p
->p_wqlock
);
1093 workqueue_unlock(proc_t p
)
1095 lck_mtx_unlock(&p
->p_wqlock
);
1101 workqueue_interval_timer_start(thread_call_t call
, int interval_in_ms
)
1105 clock_interval_to_deadline(interval_in_ms
, 1000 * 1000, &deadline
);
1107 thread_call_enter_delayed(call
, deadline
);
1112 workqueue_timer(struct workqueue
*wq
, __unused
int param1
)
1114 struct timeval tv
, dtv
;
1116 boolean_t added_more_threads
= FALSE
;
1117 boolean_t reset_maxactive
= FALSE
;
1118 boolean_t restart_timer
= FALSE
;
1122 KERNEL_DEBUG(0xefffd108, (int)wq
, 0, 0, 0, 0);
1125 * check to see if the stall frequency was beyond our tolerance
1126 * or we have work on the queue, but haven't scheduled any
1127 * new work within our acceptable time interval because
1128 * there were no idle threads left to schedule
1130 * WQ_TIMER_WATCH will only be set if we have 1 or more affinity
1131 * groups that have stalled (no active threads and no idle threads)...
1132 * it will not be set if all affinity groups have at least 1 thread
1133 * that is currently runnable... if all processors have a runnable
1134 * thread, there is no need to add more threads even if we're not
1135 * scheduling new work within our allowed window... it just means
1136 * that the work items are taking a long time to complete.
1138 if (wq
->wq_flags
& (WQ_ADD_TO_POOL
| WQ_TIMER_WATCH
)) {
1140 if (wq
->wq_flags
& WQ_ADD_TO_POOL
)
1141 added_more_threads
= TRUE
;
1143 timersub(&tv
, &wq
->wq_lastran_ts
, &dtv
);
1145 if (((dtv
.tv_sec
* 1000000) + dtv
.tv_usec
) > wq_stalled_window_usecs
)
1146 added_more_threads
= TRUE
;
1148 if (added_more_threads
== TRUE
) {
1149 for (i
= 0; i
< wq
->wq_affinity_max
&& wq
->wq_nthreads
< WORKQUEUE_MAXTHREADS
; i
++) {
1150 (void)workqueue_addnewthread(wq
);
1154 timersub(&tv
, &wq
->wq_reduce_ts
, &dtv
);
1156 if (((dtv
.tv_sec
* 1000000) + dtv
.tv_usec
) > wq_reduce_pool_window_usecs
)
1157 reset_maxactive
= TRUE
;
1160 * if the pool size has grown beyond the minimum number
1161 * of threads needed to keep all of the processors busy, and
1162 * the maximum number of threads scheduled concurrently during
1163 * the last sample period didn't exceed half the current pool
1164 * size, then its time to trim the pool size back
1166 if (added_more_threads
== FALSE
&&
1167 reset_maxactive
== TRUE
&&
1168 wq
->wq_nthreads
> wq
->wq_affinity_max
&&
1169 wq
->wq_max_threads_scheduled
<= (wq
->wq_nthreads
/ 2)) {
1170 uint32_t nthreads_to_remove
;
1172 if ((nthreads_to_remove
= (wq
->wq_nthreads
/ 4)) == 0)
1173 nthreads_to_remove
= 1;
1175 for (i
= 0; i
< nthreads_to_remove
&& wq
->wq_nthreads
> wq
->wq_affinity_max
; i
++)
1176 workqueue_removethread(wq
);
1178 workqueue_lock_spin(wq
->wq_proc
);
1180 if (reset_maxactive
== TRUE
) {
1181 wq
->wq_max_threads_scheduled
= 0;
1182 microuptime(&wq
->wq_reduce_ts
);
1184 if (added_more_threads
) {
1185 wq
->wq_flags
&= ~(WQ_ADD_TO_POOL
| WQ_TIMER_WATCH
);
1188 * since we added more threads, we should be
1189 * able to run some work if its still available
1191 workqueue_run_nextitem(wq
->wq_proc
, THREAD_NULL
);
1192 workqueue_lock_spin(wq
->wq_proc
);
1194 if ((wq
->wq_nthreads
> wq
->wq_affinity_max
) ||
1195 (wq
->wq_flags
& WQ_TIMER_WATCH
)) {
1196 restart_timer
= TRUE
;
1198 wq
->wq_flags
&= ~WQ_TIMER_RUNNING
;
1200 workqueue_unlock(wq
->wq_proc
);
1203 * we needed to knock down the WQ_TIMER_RUNNING flag while behind
1204 * the workqueue lock... however, we don't want to hold the lock
1205 * while restarting the timer and we certainly don't want 2 or more
1206 * instances of the timer... so set a local to indicate the need
1207 * for a restart since the state of wq_flags may change once we
1208 * drop the workqueue lock...
1210 if (restart_timer
== TRUE
)
1211 workqueue_interval_timer_start(wq
->wq_timer_call
, wq_timer_interval_msecs
);
1220 struct uthread
*uth
;
1221 struct threadlist
*tl
;
1222 struct workqueue
*wq
;
1224 uth
= get_bsdthread_info(thread
);
1225 tl
= uth
->uu_threadlist
;
1230 case SCHED_CALL_BLOCK
:
1232 uint32_t old_activecount
;
1234 old_activecount
= OSAddAtomic(-1, (SInt32
*)&wq
->wq_thactivecount
[tl
->th_affinity_tag
]);
1236 if (old_activecount
== 1 && wq
->wq_itemcount
) {
1238 * we were the last active thread on this affinity set
1239 * and we've got work to do
1241 workqueue_lock_spin(wq
->wq_proc
);
1243 * if this thread is blocking (not parking)
1244 * and the idle list is empty for this affinity group
1245 * we'll count it as a 'stall'
1247 if ((tl
->th_flags
& TH_LIST_RUNNING
) &&
1248 TAILQ_EMPTY(&wq
->wq_thidlelist
[tl
->th_affinity_tag
]))
1249 wq
->wq_stalled_count
++;
1251 workqueue_run_nextitem(wq
->wq_proc
, THREAD_NULL
);
1253 * workqueue_run_nextitem will drop the workqueue
1254 * lock before it returns
1257 KERNEL_DEBUG(0xefffd020, (int)thread
, wq
->wq_threads_scheduled
, tl
->th_affinity_tag
, 0, 0);
1261 case SCHED_CALL_UNBLOCK
:
1263 * we cannot take the workqueue_lock here...
1264 * an UNBLOCK can occur from a timer event which
1265 * is run from an interrupt context... if the workqueue_lock
1266 * is already held by this processor, we'll deadlock...
1267 * the thread lock for the thread being UNBLOCKED
1270 if (tl
->th_unparked
)
1271 OSAddAtomic(-1, (SInt32
*)&tl
->th_unparked
);
1273 OSAddAtomic(1, (SInt32
*)&wq
->wq_thactivecount
[tl
->th_affinity_tag
]);
1275 KERNEL_DEBUG(0xefffd024, (int)thread
, wq
->wq_threads_scheduled
, tl
->th_affinity_tag
, 0, 0);
1281 workqueue_removethread(struct workqueue
*wq
)
1283 struct threadlist
*tl
;
1284 uint32_t i
, affinity_tag
= 0;
1288 workqueue_lock_spin(wq
->wq_proc
);
1290 for (i
= 0; i
< wq
->wq_affinity_max
; i
++) {
1292 affinity_tag
= wq
->wq_nextaffinitytag
;
1294 if (affinity_tag
== 0)
1295 affinity_tag
= wq
->wq_affinity_max
- 1;
1298 wq
->wq_nextaffinitytag
= affinity_tag
;
1301 * look for an idle thread to steal from this affinity group
1302 * but don't grab the only thread associated with it
1304 if (!TAILQ_EMPTY(&wq
->wq_thidlelist
[affinity_tag
]) && wq
->wq_thcount
[affinity_tag
] > 1) {
1305 tl
= TAILQ_FIRST(&wq
->wq_thidlelist
[affinity_tag
]);
1306 TAILQ_REMOVE(&wq
->wq_thidlelist
[affinity_tag
], tl
, th_entry
);
1309 wq
->wq_thcount
[affinity_tag
]--;
1314 workqueue_unlock(wq
->wq_proc
);
1317 thread_sched_call(tl
->th_thread
, NULL
);
1319 if ( (tl
->th_flags
& TH_LIST_BLOCKED
) )
1323 * thread was created, but never used...
1324 * need to clean up the stack and port ourselves
1325 * since we're not going to spin up through the
1326 * normal exit path triggered from Libc
1328 (void)mach_vm_deallocate(wq
->wq_map
, tl
->th_stackaddr
, tl
->th_allocsize
);
1329 (void)mach_port_deallocate(get_task_ipcspace(wq
->wq_task
), (mach_port_name_t
)tl
->th_thport
);
1331 thread_terminate(tl
->th_thread
);
1333 KERNEL_DEBUG(0xefffd030, (int)tl
->th_thread
, wq
->wq_nthreads
, tl
->th_flags
& TH_LIST_BLOCKED
, 0, 0);
1335 * drop our ref on the thread
1337 thread_deallocate(tl
->th_thread
);
1339 kfree(tl
, sizeof(struct threadlist
));
1345 workqueue_addnewthread(struct workqueue
*wq
)
1347 struct threadlist
*tl
;
1348 struct uthread
*uth
;
1353 mach_vm_offset_t stackaddr
;
1354 uint32_t affinity_tag
;
1358 kret
= thread_create(wq
->wq_task
, &th
);
1360 if (kret
!= KERN_SUCCESS
)
1363 tl
= kalloc(sizeof(struct threadlist
));
1364 bzero(tl
, sizeof(struct threadlist
));
1366 #if defined(__ppc__)
1367 stackaddr
= 0xF0000000;
1368 #elif defined(__i386__)
1369 stackaddr
= 0xB0000000;
1371 #error Need to define a stack address hint for this architecture
1373 tl
->th_allocsize
= PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
+ p
->p_pthsize
;
1375 kret
= mach_vm_map(wq
->wq_map
, &stackaddr
,
1378 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
, NULL
,
1379 0, FALSE
, VM_PROT_DEFAULT
, VM_PROT_ALL
,
1380 VM_INHERIT_DEFAULT
);
1382 if (kret
!= KERN_SUCCESS
) {
1383 kret
= mach_vm_allocate(wq
->wq_map
,
1384 &stackaddr
, tl
->th_allocsize
,
1385 VM_MAKE_TAG(VM_MEMORY_STACK
) | VM_FLAGS_ANYWHERE
);
1387 if (kret
== KERN_SUCCESS
) {
1389 * The guard page is at the lowest address
1390 * The stack base is the highest address
1392 kret
= mach_vm_protect(wq
->wq_map
, stackaddr
, PTH_DEFAULT_GUARDSIZE
, FALSE
, VM_PROT_NONE
);
1394 if (kret
!= KERN_SUCCESS
)
1395 (void) mach_vm_deallocate(wq
->wq_map
, stackaddr
, tl
->th_allocsize
);
1397 if (kret
!= KERN_SUCCESS
) {
1398 (void) thread_terminate(th
);
1400 kfree(tl
, sizeof(struct threadlist
));
1404 thread_reference(th
);
1406 sright
= (void *) convert_thread_to_port(th
);
1407 tl
->th_thport
= (void *)ipc_port_copyout_send(sright
, get_task_ipcspace(wq
->wq_task
));
1409 thread_static_param(th
, TRUE
);
1411 workqueue_lock_spin(p
);
1413 affinity_tag
= wq
->wq_nextaffinitytag
;
1414 wq
->wq_nextaffinitytag
= (affinity_tag
+ 1) % wq
->wq_affinity_max
;
1416 workqueue_unlock(p
);
1418 tl
->th_flags
= TH_LIST_INITED
| TH_LIST_SUSPENDED
;
1422 tl
->th_stackaddr
= stackaddr
;
1423 tl
->th_affinity_tag
= affinity_tag
;
1425 #if defined(__ppc__)
1426 //ml_fp_setvalid(FALSE);
1427 thread_set_cthreadself(th
, (uint64_t)(tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
), IS_64BIT_PROCESS(p
));
1428 #endif /* __ppc__ */
1430 * affinity tag of 0 means no affinity...
1431 * but we want our tags to be 0 based because they
1432 * are used to index arrays, so...
1433 * keep it 0 based internally and bump by 1 when
1434 * calling out to set it
1436 (void)thread_affinity_set(th
, affinity_tag
+ 1);
1437 thread_sched_call(th
, workqueue_callback
);
1439 uth
= get_bsdthread_info(tl
->th_thread
);
1440 uth
->uu_threadlist
= (void *)tl
;
1442 workqueue_lock_spin(p
);
1444 TAILQ_INSERT_TAIL(&wq
->wq_thidlelist
[tl
->th_affinity_tag
], tl
, th_entry
);
1446 wq
->wq_thcount
[affinity_tag
]++;
1448 KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_START
, (int)current_thread(), affinity_tag
, wq
->wq_nthreads
, 0, (int)tl
->th_thread
);
1451 * work may have come into the queue while
1452 * no threads were available to run... since
1453 * we're adding a new thread, go evaluate the
1456 workqueue_run_nextitem(p
, THREAD_NULL
);
1458 * workqueue_run_nextitem is responsible for
1459 * dropping the workqueue lock in all cases
1466 workq_open(__unused
struct proc
*p
, __unused
struct workq_open_args
*uap
, __unused register_t
*retval
)
1468 struct workqueue
* wq
;
1475 struct workitem
* witem
;
1476 struct workitemlist
*wl
;
1480 if (p
->p_wqptr
== NULL
) {
1481 num_cpus
= ml_get_max_cpus();
1483 size
= (sizeof(struct workqueue
)) +
1484 (num_cpus
* sizeof(int *)) +
1485 (num_cpus
* sizeof(TAILQ_HEAD(, threadlist
)));
1487 ptr
= (char *)kalloc(size
);
1490 wq
= (struct workqueue
*)ptr
;
1491 wq
->wq_flags
= WQ_LIST_INITED
;
1493 wq
->wq_affinity_max
= num_cpus
;
1494 wq
->wq_task
= current_task();
1495 wq
->wq_map
= current_map();
1497 for (i
= 0; i
< WORKQUEUE_NUMPRIOS
; i
++) {
1498 wl
= (struct workitemlist
*)&wq
->wq_list
[i
];
1499 TAILQ_INIT(&wl
->wl_itemlist
);
1500 TAILQ_INIT(&wl
->wl_freelist
);
1502 for (j
= 0; j
< WORKITEM_SIZE
; j
++) {
1503 witem
= &wq
->wq_array
[(i
*WORKITEM_SIZE
) + j
];
1504 TAILQ_INSERT_TAIL(&wl
->wl_freelist
, witem
, wi_entry
);
1507 wq
->wq_thactivecount
= (uint32_t *)((char *)ptr
+ sizeof(struct workqueue
));
1508 wq
->wq_thcount
= (uint32_t *)&wq
->wq_thactivecount
[wq
->wq_affinity_max
];
1509 wq
->wq_thidlelist
= (struct wq_thidlelist
*)&wq
->wq_thcount
[wq
->wq_affinity_max
];
1511 for (i
= 0; i
< wq
->wq_affinity_max
; i
++)
1512 TAILQ_INIT(&wq
->wq_thidlelist
[i
]);
1514 TAILQ_INIT(&wq
->wq_thrunlist
);
1516 p
->p_wqptr
= (void *)wq
;
1519 workqueue_unlock(p
);
1521 wq
->wq_timer_call
= thread_call_allocate((thread_call_func_t
)workqueue_timer
, (thread_call_param_t
)wq
);
1523 for (i
= 0; i
< wq
->wq_affinity_max
; i
++) {
1524 (void)workqueue_addnewthread(wq
);
1526 /* If unable to create any threads, return error */
1527 if (wq
->wq_nthreads
== 0)
1529 workqueue_lock_spin(p
);
1531 microuptime(&wq
->wq_reduce_ts
);
1532 microuptime(&wq
->wq_lastran_ts
);
1533 wq
->wq_max_threads_scheduled
= 0;
1534 wq
->wq_stalled_count
= 0;
1536 workqueue_unlock(p
);
1542 workq_ops(struct proc
*p
, struct workq_ops_args
*uap
, __unused register_t
*retval
)
1544 int options
= uap
->options
;
1545 int prio
= uap
->prio
; /* should be used to find the right workqueue */
1546 user_addr_t item
= uap
->item
;
1548 thread_t th
= THREAD_NULL
;
1549 struct workqueue
*wq
;
1551 prio
+= 2; /* normalize prio -2 to +2 to 0 -4 */
1555 case WQOPS_QUEUE_ADD
: {
1557 KERNEL_DEBUG(0xefffd008 | DBG_FUNC_NONE
, (int)item
, 0, 0, 0, 0);
1559 if ((prio
< 0) || (prio
>= 5))
1562 workqueue_lock_spin(p
);
1564 if ((wq
= (struct workqueue
*)p
->p_wqptr
) == NULL
) {
1565 workqueue_unlock(p
);
1568 error
= workqueue_additem(wq
, prio
, item
);
1572 case WQOPS_QUEUE_REMOVE
: {
1574 if ((prio
< 0) || (prio
>= 5))
1577 workqueue_lock_spin(p
);
1579 if ((wq
= (struct workqueue
*)p
->p_wqptr
) == NULL
) {
1580 workqueue_unlock(p
);
1583 error
= workqueue_removeitem(wq
, prio
, item
);
1586 case WQOPS_THREAD_RETURN
: {
1588 th
= current_thread();
1590 KERNEL_DEBUG(0xefffd004 | DBG_FUNC_END
, 0, 0, 0, 0, 0);
1592 workqueue_lock_spin(p
);
1594 if ((wq
= (struct workqueue
*)p
->p_wqptr
) == NULL
) {
1595 workqueue_unlock(p
);
1603 workqueue_run_nextitem(p
, th
);
1605 * workqueue_run_nextitem is responsible for
1606 * dropping the workqueue lock in all cases
1612 workqueue_exit(struct proc
*p
)
1614 struct workqueue
* wq
;
1615 struct threadlist
* tl
, *tlist
;
1618 if (p
->p_wqptr
!= NULL
) {
1620 workqueue_lock_spin(p
);
1622 wq
= (struct workqueue
*)p
->p_wqptr
;
1625 workqueue_unlock(p
);
1630 if (wq
->wq_flags
& WQ_TIMER_RUNNING
)
1631 thread_call_cancel(wq
->wq_timer_call
);
1632 thread_call_free(wq
->wq_timer_call
);
1634 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thrunlist
, th_entry
, tlist
) {
1636 * drop our last ref on the thread
1638 thread_sched_call(tl
->th_thread
, NULL
);
1639 thread_deallocate(tl
->th_thread
);
1641 TAILQ_REMOVE(&wq
->wq_thrunlist
, tl
, th_entry
);
1642 kfree(tl
, sizeof(struct threadlist
));
1644 for (i
= 0; i
< wq
->wq_affinity_max
; i
++) {
1645 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thidlelist
[i
], th_entry
, tlist
) {
1647 * drop our last ref on the thread
1649 thread_sched_call(tl
->th_thread
, NULL
);
1650 thread_deallocate(tl
->th_thread
);
1652 TAILQ_REMOVE(&wq
->wq_thidlelist
[i
], tl
, th_entry
);
1653 kfree(tl
, sizeof(struct threadlist
));
1656 kfree(wq
, p
->p_wqsize
);
1661 workqueue_additem(struct workqueue
*wq
, int prio
, user_addr_t item
)
1663 struct workitem
*witem
;
1664 struct workitemlist
*wl
;
1666 wl
= (struct workitemlist
*)&wq
->wq_list
[prio
];
1668 if (TAILQ_EMPTY(&wl
->wl_freelist
))
1671 witem
= (struct workitem
*)TAILQ_FIRST(&wl
->wl_freelist
);
1672 TAILQ_REMOVE(&wl
->wl_freelist
, witem
, wi_entry
);
1674 witem
->wi_item
= item
;
1675 TAILQ_INSERT_TAIL(&wl
->wl_itemlist
, witem
, wi_entry
);
1677 if (wq
->wq_itemcount
== 0) {
1678 microuptime(&wq
->wq_lastran_ts
);
1679 wq
->wq_stalled_count
= 0;
1687 workqueue_removeitem(struct workqueue
*wq
, int prio
, user_addr_t item
)
1689 struct workitem
*witem
;
1690 struct workitemlist
*wl
;
1693 wl
= (struct workitemlist
*)&wq
->wq_list
[prio
];
1695 TAILQ_FOREACH(witem
, &wl
->wl_itemlist
, wi_entry
) {
1696 if (witem
->wi_item
== item
) {
1697 TAILQ_REMOVE(&wl
->wl_itemlist
, witem
, wi_entry
);
1700 witem
->wi_item
= (user_addr_t
)0;
1701 TAILQ_INSERT_HEAD(&wl
->wl_freelist
, witem
, wi_entry
);
1707 if (wq
->wq_itemcount
== 0)
1708 wq
->wq_flags
&= ~(WQ_ADD_TO_POOL
| WQ_TIMER_WATCH
);
1714 * workqueue_run_nextitem:
1715 * called with the workqueue lock held...
1716 * responsible for dropping it in all cases
1719 workqueue_run_nextitem(proc_t p
, thread_t thread
)
1721 struct workqueue
*wq
;
1722 struct workitem
*witem
= NULL
;
1723 user_addr_t item
= 0;
1724 thread_t th_to_run
= THREAD_NULL
;
1725 thread_t th_to_park
= THREAD_NULL
;
1726 int wake_thread
= 0;
1727 int reuse_thread
= 1;
1728 uint32_t stalled_affinity_count
= 0;
1730 uint32_t affinity_tag
;
1731 struct threadlist
*tl
= NULL
;
1732 struct uthread
*uth
= NULL
;
1733 struct workitemlist
*wl
;
1734 boolean_t start_timer
= FALSE
;
1735 struct timeval tv
, lat_tv
;
1737 wq
= (struct workqueue
*)p
->p_wqptr
;
1739 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_START
, (int)thread
, wq
->wq_threads_scheduled
, wq
->wq_stalled_count
, 0, 0);
1741 if (wq
->wq_itemcount
== 0) {
1742 if ((th_to_park
= thread
) == THREAD_NULL
)
1746 if (thread
!= THREAD_NULL
) {
1748 * we're a worker thread from the pool... currently we
1749 * are considered 'active' which means we're counted
1750 * in "wq_thactivecount"
1752 uth
= get_bsdthread_info(thread
);
1753 tl
= uth
->uu_threadlist
;
1755 if (wq
->wq_thactivecount
[tl
->th_affinity_tag
] == 1) {
1757 * we're the only active thread associated with our
1758 * affinity group, so pick up some work and keep going
1764 for (affinity_tag
= 0; affinity_tag
< wq
->wq_affinity_max
; affinity_tag
++) {
1766 * look for first affinity group that is currently not active
1767 * and has at least 1 idle thread
1769 if (wq
->wq_thactivecount
[affinity_tag
] == 0) {
1770 if (!TAILQ_EMPTY(&wq
->wq_thidlelist
[affinity_tag
]))
1772 stalled_affinity_count
++;
1775 if (thread
== THREAD_NULL
) {
1777 * we're not one of the 'worker' threads
1779 if (affinity_tag
>= wq
->wq_affinity_max
) {
1781 * we've already got at least 1 thread per
1782 * affinity group in the active state... or
1783 * we've got no idle threads to play with
1785 if (stalled_affinity_count
) {
1787 if ( !(wq
->wq_flags
& WQ_TIMER_RUNNING
) ) {
1788 wq
->wq_flags
|= WQ_TIMER_RUNNING
;
1791 wq
->wq_flags
|= WQ_TIMER_WATCH
;
1797 * we're overbooked on the affinity group we're associated with,
1798 * so park this thread
1800 th_to_park
= thread
;
1802 if (affinity_tag
>= wq
->wq_affinity_max
) {
1804 * all the affinity groups have active threads
1805 * running, or there are no idle threads to
1808 if (stalled_affinity_count
) {
1810 if ( !(wq
->wq_flags
& WQ_TIMER_RUNNING
) ) {
1811 wq
->wq_flags
|= WQ_TIMER_RUNNING
;
1814 wq
->wq_flags
|= WQ_TIMER_WATCH
;
1819 * we've got a candidate (affinity group with no currently
1820 * active threads) to start a new thread on...
1821 * we already know there is both work available
1822 * and an idle thread with the correct affinity tag, so
1823 * fall into the code that pulls a new thread and workitem...
1824 * once we've kicked that thread off, we'll park this one
1827 tl
= TAILQ_FIRST(&wq
->wq_thidlelist
[affinity_tag
]);
1828 TAILQ_REMOVE(&wq
->wq_thidlelist
[affinity_tag
], tl
, th_entry
);
1830 th_to_run
= tl
->th_thread
;
1831 TAILQ_INSERT_TAIL(&wq
->wq_thrunlist
, tl
, th_entry
);
1833 if ((tl
->th_flags
& TH_LIST_SUSPENDED
) == TH_LIST_SUSPENDED
) {
1834 tl
->th_flags
&= ~TH_LIST_SUSPENDED
;
1836 } else if ((tl
->th_flags
& TH_LIST_BLOCKED
) == TH_LIST_BLOCKED
) {
1837 tl
->th_flags
&= ~TH_LIST_BLOCKED
;
1840 tl
->th_flags
|= TH_LIST_RUNNING
;
1842 wq
->wq_threads_scheduled
++;
1844 if (wq
->wq_threads_scheduled
> wq
->wq_max_threads_scheduled
)
1845 wq
->wq_max_threads_scheduled
= wq
->wq_threads_scheduled
;
1848 for (i
= 0; i
< WORKQUEUE_NUMPRIOS
; i
++) {
1849 wl
= (struct workitemlist
*)&wq
->wq_list
[i
];
1851 if (!(TAILQ_EMPTY(&wl
->wl_itemlist
))) {
1853 witem
= TAILQ_FIRST(&wl
->wl_itemlist
);
1854 TAILQ_REMOVE(&wl
->wl_itemlist
, witem
, wi_entry
);
1857 item
= witem
->wi_item
;
1858 witem
->wi_item
= (user_addr_t
)0;
1859 TAILQ_INSERT_HEAD(&wl
->wl_freelist
, witem
, wi_entry
);
1865 panic("workq_run_nextitem: NULL witem");
1867 if (thread
!= th_to_run
) {
1869 * we're starting up a thread from a parked/suspended condition
1871 OSAddAtomic(1, (SInt32
*)&wq
->wq_thactivecount
[tl
->th_affinity_tag
]);
1872 OSAddAtomic(1, (SInt32
*)&tl
->th_unparked
);
1874 if (wq
->wq_itemcount
== 0)
1875 wq
->wq_flags
&= ~WQ_TIMER_WATCH
;
1879 * if we had any affinity groups stall (no threads runnable)
1880 * since we last scheduled an item... and
1881 * the elapsed time since we last scheduled an item
1882 * exceeds the latency tolerance...
1883 * we ask the timer thread (which should already be running)
1884 * to add some more threads to the pool
1886 if (wq
->wq_stalled_count
&& !(wq
->wq_flags
& WQ_ADD_TO_POOL
)) {
1887 timersub(&tv
, &wq
->wq_lastran_ts
, &lat_tv
);
1889 if (((lat_tv
.tv_sec
* 1000000) + lat_tv
.tv_usec
) > wq_max_run_latency_usecs
)
1890 wq
->wq_flags
|= WQ_ADD_TO_POOL
;
1892 KERNEL_DEBUG(0xefffd10c, wq
->wq_stalled_count
, lat_tv
.tv_sec
, lat_tv
.tv_usec
, wq
->wq_flags
, 0);
1894 wq
->wq_lastran_ts
= tv
;
1896 wq
->wq_stalled_count
= 0;
1897 workqueue_unlock(p
);
1899 KERNEL_DEBUG(0xefffd02c, wq
->wq_thactivecount
[0], wq
->wq_thactivecount
[1],
1900 wq
->wq_thactivecount
[2], wq
->wq_thactivecount
[3], 0);
1902 KERNEL_DEBUG(0xefffd02c, wq
->wq_thactivecount
[4], wq
->wq_thactivecount
[5],
1903 wq
->wq_thactivecount
[6], wq
->wq_thactivecount
[7], 0);
1906 * if current thread is reused for workitem, does not return via unix_syscall
1908 wq_runitem(p
, item
, th_to_run
, tl
, reuse_thread
, wake_thread
, (thread
== th_to_run
));
1910 if (th_to_park
== THREAD_NULL
) {
1912 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END
, (int)thread
, (int)item
, wq
->wq_flags
, 1, 0);
1916 workqueue_lock_spin(p
);
1919 wq
->wq_threads_scheduled
--;
1921 * this is a workqueue thread with no more
1922 * work to do... park it for now
1924 uth
= get_bsdthread_info(th_to_park
);
1925 tl
= uth
->uu_threadlist
;
1927 panic("wq thread with no threadlist ");
1929 TAILQ_REMOVE(&wq
->wq_thrunlist
, tl
, th_entry
);
1930 tl
->th_flags
&= ~TH_LIST_RUNNING
;
1932 tl
->th_flags
|= TH_LIST_BLOCKED
;
1933 TAILQ_INSERT_HEAD(&wq
->wq_thidlelist
[tl
->th_affinity_tag
], tl
, th_entry
);
1935 assert_wait((caddr_t
)tl
, (THREAD_INTERRUPTIBLE
));
1937 workqueue_unlock(p
);
1940 workqueue_interval_timer_start(wq
->wq_timer_call
, wq_timer_interval_msecs
);
1942 KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_START
, (int)current_thread(), wq
->wq_threads_scheduled
, 0, 0, (int)th_to_park
);
1944 thread_block((thread_continue_t
)thread_exception_return
);
1946 panic("unexpected return from thread_block");
1949 workqueue_unlock(p
);
1952 workqueue_interval_timer_start(wq
->wq_timer_call
, wq_timer_interval_msecs
);
1954 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END
, (int)thread
, 0, wq
->wq_flags
, 2, 0);
1960 wq_runitem(proc_t p
, user_addr_t item
, thread_t th
, struct threadlist
*tl
,
1961 int reuse_thread
, int wake_thread
, int return_directly
)
1965 KERNEL_DEBUG1(0xefffd004 | DBG_FUNC_START
, (int)current_thread(), (int)item
, wake_thread
, tl
->th_affinity_tag
, (int)th
);
1967 ret
= setup_wqthread(p
, th
, item
, reuse_thread
, tl
);
1970 panic("setup_wqthread failed %x\n", ret
);
1972 if (return_directly
) {
1973 thread_exception_return();
1975 panic("wq_runitem: thread_exception_return returned ...\n");
1978 KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_END
, (int)current_thread(), 0, 0, 0, (int)th
);
1982 KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END
, (int)current_thread(), 0, 0, 0, (int)th
);
1990 setup_wqthread(proc_t p
, thread_t th
, user_addr_t item
, int reuse_thread
, struct threadlist
*tl
)
1992 #if defined(__ppc__)
1994 * Set up PowerPC registers...
1995 * internally they are always kept as 64 bit and
1996 * since the register set is the same between 32 and 64bit modes
1997 * we don't need 2 different methods for setting the state
2000 ppc_thread_state64_t state64
;
2001 ppc_thread_state64_t
*ts64
= &state64
;
2003 ts64
->srr0
= (uint64_t)p
->p_wqthread
;
2004 ts64
->r1
= (uint64_t)((tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
) - C_ARGSAVE_LEN
- C_RED_ZONE
);
2005 ts64
->r3
= (uint64_t)(tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
);
2006 ts64
->r4
= (uint64_t)((unsigned int)tl
->th_thport
);
2007 ts64
->r5
= (uint64_t)(tl
->th_stackaddr
+ PTH_DEFAULT_GUARDSIZE
);
2008 ts64
->r6
= (uint64_t)item
;
2009 ts64
->r7
= (uint64_t)reuse_thread
;
2010 ts64
->r8
= (uint64_t)0;
2012 thread_set_wq_state64(th
, (thread_state_t
)ts64
);
2014 #elif defined(__i386__)
2017 isLP64
= IS_64BIT_PROCESS(p
);
2019 * Set up i386 registers & function call.
2022 x86_thread_state32_t state
;
2023 x86_thread_state32_t
*ts
= &state
;
2025 ts
->eip
= (int)p
->p_wqthread
;
2026 ts
->eax
= (unsigned int)(tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
);
2027 ts
->ebx
= (unsigned int)tl
->th_thport
;
2028 ts
->ecx
= (unsigned int)(tl
->th_stackaddr
+ PTH_DEFAULT_GUARDSIZE
);
2029 ts
->edx
= (unsigned int)item
;
2030 ts
->edi
= (unsigned int)reuse_thread
;
2031 ts
->esi
= (unsigned int)0;
2035 ts
->esp
= (int)((vm_offset_t
)((tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
) - C_32_STK_ALIGN
));
2037 thread_set_wq_state32(th
, (thread_state_t
)ts
);
2040 x86_thread_state64_t state64
;
2041 x86_thread_state64_t
*ts64
= &state64
;
2043 ts64
->rip
= (uint64_t)p
->p_wqthread
;
2044 ts64
->rdi
= (uint64_t)(tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
);
2045 ts64
->rsi
= (uint64_t)((unsigned int)(tl
->th_thport
));
2046 ts64
->rdx
= (uint64_t)(tl
->th_stackaddr
+ PTH_DEFAULT_GUARDSIZE
);
2047 ts64
->rcx
= (uint64_t)item
;
2048 ts64
->r8
= (uint64_t)reuse_thread
;
2049 ts64
->r9
= (uint64_t)0;
2052 * set stack pointer aligned to 16 byte boundary
2054 ts64
->rsp
= (uint64_t)((tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
) - C_64_REDZONE_LEN
);
2056 thread_set_wq_state64(th
, (thread_state_t
)ts64
);
2059 #error setup_wqthread not defined for this architecture