2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
33 #define _PTHREAD_CONDATTR_T
34 #define _PTHREAD_COND_T
35 #define _PTHREAD_MUTEXATTR_T
36 #define _PTHREAD_MUTEX_T
37 #define _PTHREAD_RWLOCKATTR_T
38 #define _PTHREAD_RWLOCK_T
40 #undef pthread_mutexattr_t
41 #undef pthread_mutex_t
42 #undef pthread_condattr_t
44 #undef pthread_rwlockattr_t
45 #undef pthread_rwlock_t
47 #include <sys/param.h>
48 #include <sys/queue.h>
49 #include <sys/resourcevar.h>
50 #include <sys/proc_internal.h>
51 #include <sys/kauth.h>
52 #include <sys/systm.h>
53 #include <sys/timeb.h>
54 #include <sys/times.h>
56 #include <sys/kernel.h>
58 #include <sys/signalvar.h>
59 #include <sys/syslog.h>
62 #include <sys/kdebug.h>
63 #include <sys/sysproto.h>
64 #include <sys/pthread_internal.h>
66 #include <sys/user.h> /* for coredump */
69 #include <mach/mach_types.h>
70 #include <mach/vm_prot.h>
71 #include <mach/semaphore.h>
72 #include <mach/sync_policy.h>
73 #include <mach/task.h>
74 #include <kern/kern_types.h>
75 #include <kern/task.h>
76 #include <kern/clock.h>
77 #include <mach/kern_return.h>
78 #include <kern/thread.h>
79 #include <kern/sched_prim.h>
80 #include <kern/kalloc.h>
81 #include <kern/sched_prim.h> /* for thread_exception_return */
82 #include <kern/processor.h>
83 #include <kern/affinity.h>
84 #include <mach/mach_vm.h>
85 #include <mach/mach_param.h>
86 #include <mach/thread_status.h>
87 #include <mach/thread_policy.h>
88 #include <mach/message.h>
89 #include <mach/port.h>
90 #include <vm/vm_protos.h>
91 #include <vm/vm_map.h>` /* for current_map() */
92 #include <mach/thread_act.h> /* for thread_resume */
93 #include <machine/machine_routines.h>
95 #include <i386/machine_routines.h>
96 #include <i386/eflags.h>
101 #include <libkern/OSAtomic.h>
105 #define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
107 #define KERNEL_DEBUG1 KERNEL_DEBUG_CONSTANT1
111 #if defined(__ppc__) || defined(__ppc64__)
112 #include <architecture/ppc/cframe.h>
116 lck_grp_attr_t
*pthread_lck_grp_attr
;
117 lck_grp_t
*pthread_lck_grp
;
118 lck_attr_t
*pthread_lck_attr
;
119 lck_mtx_t
* pthread_list_mlock
;
120 extern void pthread_init(void);
122 extern kern_return_t
thread_getstatus(register thread_t act
, int flavor
,
123 thread_state_t tstate
, mach_msg_type_number_t
*count
);
124 extern kern_return_t
thread_setstatus(thread_t thread
, int flavor
,
125 thread_state_t tstate
, mach_msg_type_number_t count
);
126 extern void thread_set_cthreadself(thread_t thread
, uint64_t pself
, int isLP64
);
127 extern kern_return_t
mach_port_deallocate(ipc_space_t
, mach_port_name_t
);
128 extern kern_return_t
semaphore_signal_internal_trap(mach_port_name_t
);
130 static int workqueue_additem(struct workqueue
*wq
, int prio
, user_addr_t item
);
131 static int workqueue_removeitem(struct workqueue
*wq
, int prio
, user_addr_t item
);
132 static void workqueue_run_nextitem(proc_t p
, thread_t th
);
133 static void wq_runitem(proc_t p
, user_addr_t item
, thread_t th
, struct threadlist
*tl
,
134 int reuse_thread
, int wake_thread
, int return_directly
);
135 static int setup_wqthread(proc_t p
, thread_t th
, user_addr_t item
, int reuse_thread
, struct threadlist
*tl
);
136 static int workqueue_addnewthread(struct workqueue
*wq
);
137 static void workqueue_removethread(struct workqueue
*wq
);
138 static void workqueue_lock(proc_t
);
139 static void workqueue_lock_spin(proc_t
);
140 static void workqueue_unlock(proc_t
);
142 #define C_32_STK_ALIGN 16
143 #define C_64_STK_ALIGN 16
144 #define C_64_REDZONE_LEN 128
145 #define TRUNC_DOWN32(a,c) ((((uint32_t)a)-(c)) & ((uint32_t)(-(c))))
146 #define TRUNC_DOWN64(a,c) ((((uint64_t)a)-(c)) & ((uint64_t)(-(c))))
150 * Flags filed passed to bsdthread_create and back in pthread_start
151 31 <---------------------------------> 0
152 _________________________________________
153 | flags(8) | policy(8) | importance(16) |
154 -----------------------------------------
156 void _pthread_start(pthread_t self
, mach_port_t kport
, void *(*fun
)(void *), void * funarg
, size_t stacksize
, unsigned int flags
);
158 #define PTHREAD_START_CUSTOM 0x01000000
159 #define PTHREAD_START_SETSCHED 0x02000000
160 #define PTHREAD_START_DETACHED 0x04000000
161 #define PTHREAD_START_POLICY_BITSHIFT 16
162 #define PTHREAD_START_POLICY_MASK 0xffff
163 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
165 #define SCHED_OTHER POLICY_TIMESHARE
166 #define SCHED_FIFO POLICY_FIFO
167 #define SCHED_RR POLICY_RR
173 pthread_lck_grp_attr
= lck_grp_attr_alloc_init();
174 pthread_lck_grp
= lck_grp_alloc_init("pthread", pthread_lck_grp_attr
);
177 * allocate the lock attribute for pthread synchronizers
179 pthread_lck_attr
= lck_attr_alloc_init();
181 pthread_list_mlock
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
186 pthread_list_lock(void)
188 lck_mtx_lock(pthread_list_mlock
);
192 pthread_list_unlock(void)
194 lck_mtx_unlock(pthread_list_mlock
);
199 __pthread_mutex_destroy(__unused
struct proc
*p
, struct __pthread_mutex_destroy_args
*uap
, __unused register_t
*retval
)
202 int mutexid
= uap
->mutexid
;
203 pthread_mutex_t
* mutex
;
208 mutex
= pthread_id_to_mutex(mutexid
);
212 MTX_LOCK(mutex
->lock
);
213 if (mutex
->sig
== _PTHREAD_KERN_MUTEX_SIG
)
215 if (mutex
->owner
== (thread_t
)NULL
&&
216 mutex
->refcount
== 1)
218 mutex
->sig
= _PTHREAD_NO_SIG
;
222 pthread_id_mutex_remove(mutexid
);
224 MTX_UNLOCK(mutex
->lock
);
225 lck_mtx_free(lmtx
, pthread_lck_grp
);
226 lck_mtx_free(lmtx1
, pthread_lck_grp
);
227 kfree((void *)mutex
, sizeof(struct _pthread_mutex
));
235 MTX_UNLOCK(mutex
->lock
);
236 pthread_mutex_release(mutex
);
241 * Initialize a mutex variable, possibly with additional attributes.
244 pthread_mutex_init_internal(pthread_mutex_t
*mutex
, const pthread_mutexattr_t
*attr
)
246 mutex
->prioceiling
= attr
->prioceiling
;
247 mutex
->protocol
= attr
->protocol
;
248 mutex
->type
= attr
->type
;
249 mutex
->pshared
= attr
->pshared
;
251 mutex
->owner
= (thread_t
)NULL
;
252 mutex
->owner_proc
= current_proc();
253 mutex
->sig
= _PTHREAD_KERN_MUTEX_SIG
;
254 mutex
->lock
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
255 mutex
->mutex
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
259 * Initialize a mutex variable, possibly with additional attributes.
260 * Public interface - so don't trust the lock - initialize it first.
263 __pthread_mutex_init(__unused
struct proc
*p
, struct __pthread_mutex_init_args
*uap
, __unused register_t
*retval
)
265 user_addr_t umutex
= uap
->mutex
;
266 pthread_mutex_t
* mutex
;
267 user_addr_t uattr
= uap
->attr
;
268 pthread_mutexattr_t attr
;
269 unsigned int addr
= (unsigned int)((uintptr_t)uap
->mutex
);
274 if ((umutex
== 0) || (uattr
== 0))
277 if ((error
= copyin(uattr
, &attr
, sizeof(pthread_mutexattr_t
))))
280 if (attr
.sig
!= _PTHREAD_MUTEX_ATTR_SIG
)
283 if ((error
= copyin(umutex
, &pmutex_sig
, sizeof(int))))
286 if (pmutex_sig
== _PTHREAD_KERN_MUTEX_SIG
)
288 mutex
= (pthread_mutex_t
*)kalloc(sizeof(pthread_mutex_t
));
290 pthread_mutex_init_internal(mutex
, &attr
);
294 mutexid
= pthread_id_mutex_add(mutex
);
296 if ((error
= copyout(&mutexid
, ((user_addr_t
)((uintptr_t)(addr
))), 4)))
303 pthread_id_mutex_remove(mutexid
);
304 lck_mtx_free(mutex
->lock
, pthread_lck_grp
);
305 lck_mtx_free(mutex
->mutex
, pthread_lck_grp
);
306 kfree(mutex
, sizeof(struct _pthread_mutex
));
312 * TODO: Priority inheritance stuff
315 __pthread_mutex_lock(struct proc
*p
, struct __pthread_mutex_lock_args
*uap
, __unused register_t
*retval
)
317 int mutexid
= uap
->mutexid
;
318 pthread_mutex_t
* mutex
;
321 mutex
= pthread_id_to_mutex(mutexid
);
325 MTX_LOCK(mutex
->lock
);
327 if (mutex
->sig
!= _PTHREAD_KERN_MUTEX_SIG
)
333 if ((p
!= mutex
->owner_proc
) && (mutex
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
338 MTX_UNLOCK(mutex
->lock
);
340 lck_mtx_lock(mutex
->mutex
);
342 MTX_LOCK(mutex
->lock
);
343 mutex
->owner
= current_thread();
346 MTX_UNLOCK(mutex
->lock
);
347 pthread_mutex_release(mutex
);
352 * Attempt to lock a mutex, but don't block if this isn't possible.
355 __pthread_mutex_trylock(struct proc
*p
, struct __pthread_mutex_trylock_args
*uap
, __unused register_t
*retval
)
357 int mutexid
= uap
->mutexid
;
358 pthread_mutex_t
* mutex
;
362 mutex
= pthread_id_to_mutex(mutexid
);
366 MTX_LOCK(mutex
->lock
);
368 if (mutex
->sig
!= _PTHREAD_KERN_MUTEX_SIG
)
374 if ((p
!= mutex
->owner_proc
) && (mutex
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
379 MTX_UNLOCK(mutex
->lock
);
381 state
= lck_mtx_try_lock(mutex
->mutex
);
383 MTX_LOCK(mutex
->lock
);
384 mutex
->owner
= current_thread();
385 MTX_UNLOCK(mutex
->lock
);
390 pthread_mutex_release(mutex
);
393 MTX_UNLOCK(mutex
->lock
);
394 pthread_mutex_release(mutex
);
400 * TODO: Priority inheritance stuff
403 __pthread_mutex_unlock(struct proc
*p
, struct __pthread_mutex_unlock_args
*uap
, __unused register_t
*retval
)
405 int mutexid
= uap
->mutexid
;
406 pthread_mutex_t
* mutex
;
409 mutex
= pthread_id_to_mutex(mutexid
);
413 MTX_LOCK(mutex
->lock
);
415 if (mutex
->sig
!= _PTHREAD_KERN_MUTEX_SIG
)
421 if ((p
!= mutex
->owner_proc
) && (mutex
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
426 MTX_UNLOCK(mutex
->lock
);
428 lck_mtx_unlock(mutex
->mutex
);
430 MTX_LOCK(mutex
->lock
);
434 MTX_UNLOCK(mutex
->lock
);
435 pthread_mutex_release(mutex
);
441 __pthread_cond_init(__unused
struct proc
*p
, struct __pthread_cond_init_args
*uap
, __unused register_t
*retval
)
443 pthread_cond_t
* cond
;
444 pthread_condattr_t attr
;
445 user_addr_t ucond
= uap
->cond
;
446 user_addr_t uattr
= uap
->attr
;
447 unsigned int addr
= (unsigned int)((uintptr_t)uap
->cond
);
448 int condid
, error
, cond_sig
;
453 if ((ucond
== 0) || (uattr
== 0))
456 if ((error
= copyin(uattr
, &attr
, sizeof(pthread_condattr_t
))))
459 if (attr
.sig
!= _PTHREAD_COND_ATTR_SIG
)
462 if ((error
= copyin(ucond
, &cond_sig
, sizeof(int))))
465 if (cond_sig
== _PTHREAD_KERN_COND_SIG
)
467 kret
= semaphore_create(kernel_task
, &sem
, SYNC_POLICY_FIFO
, value
);
468 if (kret
!= KERN_SUCCESS
)
471 cond
= (pthread_cond_t
*)kalloc(sizeof(pthread_cond_t
));
473 cond
->lock
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
474 cond
->pshared
= attr
.pshared
;
475 cond
->sig
= _PTHREAD_KERN_COND_SIG
;
476 cond
->sigpending
= 0;
479 cond
->mutex
= (pthread_mutex_t
*)0;
480 cond
->owner_proc
= current_proc();
484 condid
= pthread_id_cond_add(cond
);
486 if ((error
= copyout(&condid
, ((user_addr_t
)((uintptr_t)(addr
))), 4)))
493 pthread_id_cond_remove(condid
);
494 semaphore_destroy(kernel_task
, cond
->sem
);
495 kfree(cond
, sizeof(pthread_cond_t
));
501 * Destroy a condition variable.
504 __pthread_cond_destroy(__unused
struct proc
*p
, struct __pthread_cond_destroy_args
*uap
, __unused register_t
*retval
)
506 pthread_cond_t
*cond
;
507 int condid
= uap
->condid
;
512 cond
= pthread_id_to_cond(condid
);
516 COND_LOCK(cond
->lock
);
517 if (cond
->sig
== _PTHREAD_KERN_COND_SIG
)
519 if (cond
->refcount
== 1)
521 cond
->sig
= _PTHREAD_NO_SIG
;
525 pthread_id_cond_remove(condid
);
527 COND_UNLOCK(cond
->lock
);
528 lck_mtx_free(lmtx
, pthread_lck_grp
);
529 (void)semaphore_destroy(kernel_task
, sem
);
530 kfree((void *)cond
, sizeof(pthread_cond_t
));
538 COND_UNLOCK(cond
->lock
);
539 pthread_cond_release(cond
);
545 * Signal a condition variable, waking up all threads waiting for it.
548 __pthread_cond_broadcast(__unused
struct proc
*p
, struct __pthread_cond_broadcast_args
*uap
, __unused register_t
*retval
)
550 int condid
= uap
->condid
;
551 pthread_cond_t
* cond
;
555 cond
= pthread_id_to_cond(condid
);
559 COND_LOCK(cond
->lock
);
561 if (cond
->sig
!= _PTHREAD_KERN_COND_SIG
)
567 if ((p
!= cond
->owner_proc
) && (cond
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
572 COND_UNLOCK(cond
->lock
);
574 kret
= semaphore_signal_all(cond
->sem
);
576 case KERN_INVALID_ADDRESS
:
577 case KERN_PROTECTION_FAILURE
:
581 case KERN_OPERATION_TIMED_OUT
:
592 COND_LOCK(cond
->lock
);
594 COND_UNLOCK(cond
->lock
);
595 pthread_cond_release(cond
);
601 * Signal a condition variable, waking only one thread.
604 __pthread_cond_signal(__unused
struct proc
*p
, struct __pthread_cond_signal_args
*uap
, __unused register_t
*retval
)
606 int condid
= uap
->condid
;
607 pthread_cond_t
* cond
;
611 cond
= pthread_id_to_cond(condid
);
615 COND_LOCK(cond
->lock
);
617 if (cond
->sig
!= _PTHREAD_KERN_COND_SIG
)
623 if ((p
!= cond
->owner_proc
) && (cond
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
628 COND_UNLOCK(cond
->lock
);
630 kret
= semaphore_signal(cond
->sem
);
632 case KERN_INVALID_ADDRESS
:
633 case KERN_PROTECTION_FAILURE
:
637 case KERN_OPERATION_TIMED_OUT
:
648 COND_LOCK(cond
->lock
);
650 COND_UNLOCK(cond
->lock
);
651 pthread_cond_release(cond
);
657 __pthread_cond_wait(__unused
struct proc
*p
, struct __pthread_cond_wait_args
*uap
, __unused register_t
*retval
)
659 int condid
= uap
->condid
;
660 pthread_cond_t
* cond
;
661 int mutexid
= uap
->mutexid
;
662 pthread_mutex_t
* mutex
;
666 cond
= pthread_id_to_cond(condid
);
670 mutex
= pthread_id_to_mutex(mutexid
);
672 pthread_cond_release(cond
);
675 COND_LOCK(cond
->lock
);
677 if (cond
->sig
!= _PTHREAD_KERN_COND_SIG
)
683 if ((p
!= cond
->owner_proc
) && (cond
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
688 COND_UNLOCK(cond
->lock
);
690 kret
= semaphore_wait(cond
->sem
);
692 case KERN_INVALID_ADDRESS
:
693 case KERN_PROTECTION_FAILURE
:
697 case KERN_OPERATION_TIMED_OUT
:
708 COND_LOCK(cond
->lock
);
710 COND_UNLOCK(cond
->lock
);
711 pthread_cond_release(cond
);
712 pthread_mutex_release(mutex
);
717 __pthread_cond_timedwait(__unused
struct proc
*p
, struct __pthread_cond_timedwait_args
*uap
, __unused register_t
*retval
)
719 int condid
= uap
->condid
;
720 pthread_cond_t
* cond
;
721 int mutexid
= uap
->mutexid
;
722 pthread_mutex_t
* mutex
;
723 mach_timespec_t absts
;
731 if ((error
= copyin(uap
->abstime
, &absts
, sizeof(mach_timespec_t
))))
733 cond
= pthread_id_to_cond(condid
);
737 mutex
= pthread_id_to_mutex(mutexid
);
739 pthread_cond_release(cond
);
742 COND_LOCK(cond
->lock
);
744 if (cond
->sig
!= _PTHREAD_KERN_COND_SIG
)
750 if ((p
!= cond
->owner_proc
) && (cond
->pshared
!= PTHREAD_PROCESS_SHARED
)) {
755 COND_UNLOCK(cond
->lock
);
757 kret
= semaphore_timedwait(cond
->sem
, absts
);
759 case KERN_INVALID_ADDRESS
:
760 case KERN_PROTECTION_FAILURE
:
764 case KERN_OPERATION_TIMED_OUT
:
775 COND_LOCK(cond
->lock
);
777 COND_UNLOCK(cond
->lock
);
778 pthread_cond_release(cond
);
779 pthread_mutex_release(mutex
);
784 bsdthread_create(__unused
struct proc
*p
, struct bsdthread_create_args
*uap
, user_addr_t
*retval
)
790 mach_vm_offset_t stackaddr
;
791 mach_vm_size_t th_allocsize
= 0;
792 mach_vm_size_t user_stacksize
;
793 mach_vm_size_t th_stacksize
;
794 mach_vm_offset_t th_stackaddr
;
795 mach_vm_offset_t th_stack
;
796 mach_vm_offset_t th_pthread
;
797 mach_port_t th_thport
;
799 user_addr_t user_func
= uap
->func
;
800 user_addr_t user_funcarg
= uap
->func_arg
;
801 user_addr_t user_stack
= uap
->stack
;
802 user_addr_t user_pthread
= uap
->pthread
;
803 unsigned int flags
= (unsigned int)uap
->flags
;
804 vm_map_t vmap
= current_map();
805 task_t ctask
= current_task();
806 unsigned int policy
, importance
;
812 KERNEL_DEBUG_CONSTANT(0x9000080 | DBG_FUNC_START
, flags
, 0, 0, 0, 0);
815 isLP64
= IS_64BIT_PROCESS(p
);
819 stackaddr
= 0xF0000000;
820 #elif defined(__i386__)
821 stackaddr
= 0xB0000000;
822 #elif defined(__arm__)
823 stackaddr
= 0xB0000000; /* XXX ARM */
825 #error Need to define a stack address hint for this architecture
827 kret
= thread_create(ctask
, &th
);
828 if (kret
!= KERN_SUCCESS
)
830 thread_reference(th
);
832 sright
= (void *) convert_thread_to_port(th
);
833 th_thport
= (void *)ipc_port_copyout_send(sright
, get_task_ipcspace(ctask
));
835 if ((flags
& PTHREAD_START_CUSTOM
) == 0) {
836 th_stacksize
= (mach_vm_size_t
)user_stack
; /* if it is custom them it is stacksize */
837 th_allocsize
= th_stacksize
+ PTH_DEFAULT_GUARDSIZE
+ p
->p_pthsize
;
839 kret
= mach_vm_map(vmap
, &stackaddr
,
842 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
, NULL
,
843 0, FALSE
, VM_PROT_DEFAULT
, VM_PROT_ALL
,
845 if (kret
!= KERN_SUCCESS
)
846 kret
= mach_vm_allocate(vmap
,
847 &stackaddr
, th_allocsize
,
848 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
);
849 if (kret
!= KERN_SUCCESS
) {
854 KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_NONE
, th_allocsize
, stackaddr
, 0, 2, 0);
856 th_stackaddr
= stackaddr
;
859 * The guard page is at the lowest address
860 * The stack base is the highest address
862 kret
= mach_vm_protect(vmap
, stackaddr
, PTH_DEFAULT_GUARDSIZE
, FALSE
, VM_PROT_NONE
);
864 if (kret
!= KERN_SUCCESS
) {
868 th_stack
= (stackaddr
+ th_stacksize
+ PTH_DEFAULT_GUARDSIZE
);
869 th_pthread
= (stackaddr
+ th_stacksize
+ PTH_DEFAULT_GUARDSIZE
);
870 user_stacksize
= th_stacksize
;
872 th_stack
= user_stack
;
873 user_stacksize
= user_stack
;
874 th_pthread
= user_pthread
;
876 KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_NONE
, 0, 0, 0, 3, 0);
882 * Set up PowerPC registers...
883 * internally they are always kept as 64 bit and
884 * since the register set is the same between 32 and 64bit modes
885 * we don't need 2 different methods for setting the state
888 ppc_thread_state64_t state64
;
889 ppc_thread_state64_t
*ts64
= &state64
;
891 ts64
->srr0
= (uint64_t)p
->p_threadstart
;
892 ts64
->r1
= (uint64_t)(th_stack
- C_ARGSAVE_LEN
- C_RED_ZONE
);
893 ts64
->r3
= (uint64_t)th_pthread
;
894 ts64
->r4
= (uint64_t)((unsigned int)th_thport
);
895 ts64
->r5
= (uint64_t)user_func
;
896 ts64
->r6
= (uint64_t)user_funcarg
;
897 ts64
->r7
= (uint64_t)user_stacksize
;
898 ts64
->r8
= (uint64_t)uap
->flags
;
900 thread_set_wq_state64(th
, (thread_state_t
)ts64
);
902 thread_set_cthreadself(th
, (uint64_t)th_pthread
, isLP64
);
904 #elif defined(__i386__)
907 * Set up i386 registers & function call.
910 x86_thread_state32_t state
;
911 x86_thread_state32_t
*ts
= &state
;
913 ts
->eip
= (int)p
->p_threadstart
;
914 ts
->eax
= (unsigned int)th_pthread
;
915 ts
->ebx
= (unsigned int)th_thport
;
916 ts
->ecx
= (unsigned int)user_func
;
917 ts
->edx
= (unsigned int)user_funcarg
;
918 ts
->edi
= (unsigned int)user_stacksize
;
919 ts
->esi
= (unsigned int)uap
->flags
;
923 ts
->esp
= (int)((vm_offset_t
)(th_stack
-C_32_STK_ALIGN
));
925 thread_set_wq_state32(th
, (thread_state_t
)ts
);
928 x86_thread_state64_t state64
;
929 x86_thread_state64_t
*ts64
= &state64
;
931 ts64
->rip
= (uint64_t)p
->p_threadstart
;
932 ts64
->rdi
= (uint64_t)th_pthread
;
933 ts64
->rsi
= (uint64_t)((unsigned int)(th_thport
));
934 ts64
->rdx
= (uint64_t)user_func
;
935 ts64
->rcx
= (uint64_t)user_funcarg
;
936 ts64
->r8
= (uint64_t)user_stacksize
;
937 ts64
->r9
= (uint64_t)uap
->flags
;
939 * set stack pointer aligned to 16 byte boundary
941 ts64
->rsp
= (uint64_t)(th_stack
- C_64_REDZONE_LEN
);
943 thread_set_wq_state64(th
, (thread_state_t
)ts64
);
946 #elif defined(__arm__)
948 int flavor
=0, count
=0;
951 kret
= thread_getstatus(th
, flavor
, (thread_state_t
)&state
, &count
);
952 if (kret
!= KERN_SUCCESS
) {
959 kret
= thread_setstatus(th
, flavor
, (thread_state_t
)&state
, count
);
960 if (kret
!= KERN_SUCCESS
)
965 #error bsdthread_create not defined for this architecture
967 /* Set scheduling parameters if needed */
968 if ((flags
& PTHREAD_START_SETSCHED
) != 0) {
969 thread_extended_policy_data_t extinfo
;
970 thread_precedence_policy_data_t precedinfo
;
972 importance
= (flags
& PTHREAD_START_IMPORTANCE_MASK
);
973 policy
= (flags
>> PTHREAD_START_POLICY_BITSHIFT
) & PTHREAD_START_POLICY_MASK
;
975 if (policy
== SCHED_OTHER
)
976 extinfo
.timeshare
= 1;
978 extinfo
.timeshare
= 0;
979 thread_policy_set(th
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extinfo
, THREAD_EXTENDED_POLICY_COUNT
);
981 precedinfo
.importance
= importance
;
982 thread_policy_set(th
, THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&precedinfo
, THREAD_PRECEDENCE_POLICY_COUNT
);
985 kret
= thread_resume(th
);
986 if (kret
!= KERN_SUCCESS
) {
990 thread_deallocate(th
); /* drop the creator reference */
992 KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_END
, error
, (unsigned int)th_pthread
, 0, 0, 0);
994 *retval
= th_pthread
;
1000 (void)mach_vm_deallocate(vmap
, stackaddr
, th_allocsize
);
1002 (void)mach_port_deallocate(get_task_ipcspace(ctask
), (mach_port_name_t
)th_thport
);
1003 (void)thread_terminate(th
);
1004 (void)thread_deallocate(th
);
1009 bsdthread_terminate(__unused
struct proc
*p
, struct bsdthread_terminate_args
*uap
, __unused register_t
*retval
)
1011 mach_vm_offset_t freeaddr
;
1012 mach_vm_size_t freesize
;
1014 mach_port_name_t kthport
= (mach_port_name_t
)uap
->port
;
1015 mach_port_name_t sem
= (mach_port_name_t
)uap
->sem
;
1017 freeaddr
= (mach_vm_offset_t
)uap
->stackaddr
;
1018 freesize
= uap
->freesize
;
1021 KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_START
, (unsigned int)freeaddr
, (unsigned int)freesize
, (unsigned int)kthport
, 0xff, 0);
1023 if (sem
!= MACH_PORT_NULL
) {
1024 kret
= semaphore_signal_internal_trap(sem
);
1025 if (kret
!= KERN_SUCCESS
) {
1029 if ((freesize
!= (mach_vm_size_t
)0) && (freeaddr
!= (mach_vm_offset_t
)0)) {
1030 kret
= mach_vm_deallocate(current_map(), freeaddr
, freesize
);
1031 if (kret
!= KERN_SUCCESS
) {
1036 (void) thread_terminate(current_thread());
1037 if (kthport
!= MACH_PORT_NULL
)
1038 mach_port_deallocate(get_task_ipcspace(current_task()), kthport
);
1039 thread_exception_return();
1040 panic("bsdthread_terminate: still running\n");
1042 KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_END
, 0, 0, 0, 0xff, 0);
1049 bsdthread_register(struct proc
*p
, struct bsdthread_register_args
*uap
, __unused register_t
*retval
)
1051 /* syscall randomizer test can pass bogus values */
1052 if (uap
->pthsize
> MAX_PTHREAD_SIZE
) {
1055 p
->p_threadstart
= uap
->threadstart
;
1056 p
->p_wqthread
= uap
->wqthread
;
1057 p
->p_pthsize
= uap
->pthsize
;
1065 int wq_stalled_window_usecs
= WQ_STALLED_WINDOW_USECS
;
1066 int wq_reduce_pool_window_usecs
= WQ_REDUCE_POOL_WINDOW_USECS
;
1067 int wq_max_run_latency_usecs
= WQ_MAX_RUN_LATENCY_USECS
;
1068 int wq_timer_interval_msecs
= WQ_TIMER_INTERVAL_MSECS
;
1071 SYSCTL_INT(_kern
, OID_AUTO
, wq_stalled_window_usecs
, CTLFLAG_RW
,
1072 &wq_stalled_window_usecs
, 0, "");
1074 SYSCTL_INT(_kern
, OID_AUTO
, wq_reduce_pool_window_usecs
, CTLFLAG_RW
,
1075 &wq_reduce_pool_window_usecs
, 0, "");
1077 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_run_latency_usecs
, CTLFLAG_RW
,
1078 &wq_max_run_latency_usecs
, 0, "");
1080 SYSCTL_INT(_kern
, OID_AUTO
, wq_timer_interval_msecs
, CTLFLAG_RW
,
1081 &wq_timer_interval_msecs
, 0, "");
1087 workqueue_init_lock(proc_t p
)
1089 lck_mtx_init(&p
->p_wqlock
, pthread_lck_grp
, pthread_lck_attr
);
1093 workqueue_destroy_lock(proc_t p
)
1095 lck_mtx_destroy(&p
->p_wqlock
, pthread_lck_grp
);
1099 workqueue_lock(proc_t p
)
1101 lck_mtx_lock(&p
->p_wqlock
);
1105 workqueue_lock_spin(proc_t p
)
1107 lck_mtx_lock_spin(&p
->p_wqlock
);
1111 workqueue_unlock(proc_t p
)
1113 lck_mtx_unlock(&p
->p_wqlock
);
1119 workqueue_interval_timer_start(thread_call_t call
, int interval_in_ms
)
1123 clock_interval_to_deadline(interval_in_ms
, 1000 * 1000, &deadline
);
1125 thread_call_enter_delayed(call
, deadline
);
1130 workqueue_timer(struct workqueue
*wq
, __unused
int param1
)
1132 struct timeval tv
, dtv
;
1134 boolean_t added_more_threads
= FALSE
;
1135 boolean_t reset_maxactive
= FALSE
;
1136 boolean_t restart_timer
= FALSE
;
1140 KERNEL_DEBUG(0xefffd108, (int)wq
, 0, 0, 0, 0);
1143 * check to see if the stall frequency was beyond our tolerance
1144 * or we have work on the queue, but haven't scheduled any
1145 * new work within our acceptable time interval because
1146 * there were no idle threads left to schedule
1148 * WQ_TIMER_WATCH will only be set if we have 1 or more affinity
1149 * groups that have stalled (no active threads and no idle threads)...
1150 * it will not be set if all affinity groups have at least 1 thread
1151 * that is currently runnable... if all processors have a runnable
1152 * thread, there is no need to add more threads even if we're not
1153 * scheduling new work within our allowed window... it just means
1154 * that the work items are taking a long time to complete.
1156 if (wq
->wq_flags
& (WQ_ADD_TO_POOL
| WQ_TIMER_WATCH
)) {
1158 if (wq
->wq_flags
& WQ_ADD_TO_POOL
)
1159 added_more_threads
= TRUE
;
1161 timersub(&tv
, &wq
->wq_lastran_ts
, &dtv
);
1163 if (((dtv
.tv_sec
* 1000000) + dtv
.tv_usec
) > wq_stalled_window_usecs
)
1164 added_more_threads
= TRUE
;
1166 if (added_more_threads
== TRUE
) {
1167 for (i
= 0; i
< wq
->wq_affinity_max
&& wq
->wq_nthreads
< WORKQUEUE_MAXTHREADS
; i
++) {
1168 (void)workqueue_addnewthread(wq
);
1172 timersub(&tv
, &wq
->wq_reduce_ts
, &dtv
);
1174 if (((dtv
.tv_sec
* 1000000) + dtv
.tv_usec
) > wq_reduce_pool_window_usecs
)
1175 reset_maxactive
= TRUE
;
1178 * if the pool size has grown beyond the minimum number
1179 * of threads needed to keep all of the processors busy, and
1180 * the maximum number of threads scheduled concurrently during
1181 * the last sample period didn't exceed half the current pool
1182 * size, then its time to trim the pool size back
1184 if (added_more_threads
== FALSE
&&
1185 reset_maxactive
== TRUE
&&
1186 wq
->wq_nthreads
> wq
->wq_affinity_max
&&
1187 wq
->wq_max_threads_scheduled
<= (wq
->wq_nthreads
/ 2)) {
1188 uint32_t nthreads_to_remove
;
1190 if ((nthreads_to_remove
= (wq
->wq_nthreads
/ 4)) == 0)
1191 nthreads_to_remove
= 1;
1193 for (i
= 0; i
< nthreads_to_remove
&& wq
->wq_nthreads
> wq
->wq_affinity_max
; i
++)
1194 workqueue_removethread(wq
);
1196 workqueue_lock_spin(wq
->wq_proc
);
1198 if (reset_maxactive
== TRUE
) {
1199 wq
->wq_max_threads_scheduled
= 0;
1200 microuptime(&wq
->wq_reduce_ts
);
1202 if (added_more_threads
) {
1203 wq
->wq_flags
&= ~(WQ_ADD_TO_POOL
| WQ_TIMER_WATCH
);
1206 * since we added more threads, we should be
1207 * able to run some work if its still available
1209 workqueue_run_nextitem(wq
->wq_proc
, THREAD_NULL
);
1210 workqueue_lock_spin(wq
->wq_proc
);
1212 if ((wq
->wq_nthreads
> wq
->wq_affinity_max
) ||
1213 (wq
->wq_flags
& WQ_TIMER_WATCH
)) {
1214 restart_timer
= TRUE
;
1216 wq
->wq_flags
&= ~WQ_TIMER_RUNNING
;
1218 workqueue_unlock(wq
->wq_proc
);
1221 * we needed to knock down the WQ_TIMER_RUNNING flag while behind
1222 * the workqueue lock... however, we don't want to hold the lock
1223 * while restarting the timer and we certainly don't want 2 or more
1224 * instances of the timer... so set a local to indicate the need
1225 * for a restart since the state of wq_flags may change once we
1226 * drop the workqueue lock...
1228 if (restart_timer
== TRUE
)
1229 workqueue_interval_timer_start(wq
->wq_timer_call
, wq_timer_interval_msecs
);
1238 struct uthread
*uth
;
1239 struct threadlist
*tl
;
1240 struct workqueue
*wq
;
1242 uth
= get_bsdthread_info(thread
);
1243 tl
= uth
->uu_threadlist
;
1248 case SCHED_CALL_BLOCK
:
1250 uint32_t old_activecount
;
1252 old_activecount
= OSAddAtomic(-1, (SInt32
*)&wq
->wq_thactivecount
[tl
->th_affinity_tag
]);
1254 if (old_activecount
== 1 && wq
->wq_itemcount
) {
1256 * we were the last active thread on this affinity set
1257 * and we've got work to do
1259 workqueue_lock_spin(wq
->wq_proc
);
1261 * if this thread is blocking (not parking)
1262 * and the idle list is empty for this affinity group
1263 * we'll count it as a 'stall'
1265 if ((tl
->th_flags
& TH_LIST_RUNNING
) &&
1266 TAILQ_EMPTY(&wq
->wq_thidlelist
[tl
->th_affinity_tag
]))
1267 wq
->wq_stalled_count
++;
1269 workqueue_run_nextitem(wq
->wq_proc
, THREAD_NULL
);
1271 * workqueue_run_nextitem will drop the workqueue
1272 * lock before it returns
1275 KERNEL_DEBUG(0xefffd020, (int)thread
, wq
->wq_threads_scheduled
, tl
->th_affinity_tag
, 0, 0);
1279 case SCHED_CALL_UNBLOCK
:
1281 * we cannot take the workqueue_lock here...
1282 * an UNBLOCK can occur from a timer event which
1283 * is run from an interrupt context... if the workqueue_lock
1284 * is already held by this processor, we'll deadlock...
1285 * the thread lock for the thread being UNBLOCKED
1288 if (tl
->th_unparked
)
1289 OSAddAtomic(-1, (SInt32
*)&tl
->th_unparked
);
1291 OSAddAtomic(1, (SInt32
*)&wq
->wq_thactivecount
[tl
->th_affinity_tag
]);
1293 KERNEL_DEBUG(0xefffd024, (int)thread
, wq
->wq_threads_scheduled
, tl
->th_affinity_tag
, 0, 0);
1299 workqueue_removethread(struct workqueue
*wq
)
1301 struct threadlist
*tl
;
1302 uint32_t i
, affinity_tag
= 0;
1306 workqueue_lock_spin(wq
->wq_proc
);
1308 for (i
= 0; i
< wq
->wq_affinity_max
; i
++) {
1310 affinity_tag
= wq
->wq_nextaffinitytag
;
1312 if (affinity_tag
== 0)
1313 affinity_tag
= wq
->wq_affinity_max
- 1;
1316 wq
->wq_nextaffinitytag
= affinity_tag
;
1319 * look for an idle thread to steal from this affinity group
1320 * but don't grab the only thread associated with it
1322 if (!TAILQ_EMPTY(&wq
->wq_thidlelist
[affinity_tag
]) && wq
->wq_thcount
[affinity_tag
] > 1) {
1323 tl
= TAILQ_FIRST(&wq
->wq_thidlelist
[affinity_tag
]);
1324 TAILQ_REMOVE(&wq
->wq_thidlelist
[affinity_tag
], tl
, th_entry
);
1327 wq
->wq_thcount
[affinity_tag
]--;
1332 workqueue_unlock(wq
->wq_proc
);
1335 thread_sched_call(tl
->th_thread
, NULL
);
1337 if ( (tl
->th_flags
& TH_LIST_BLOCKED
) )
1341 * thread was created, but never used...
1342 * need to clean up the stack and port ourselves
1343 * since we're not going to spin up through the
1344 * normal exit path triggered from Libc
1346 (void)mach_vm_deallocate(wq
->wq_map
, tl
->th_stackaddr
, tl
->th_allocsize
);
1347 (void)mach_port_deallocate(get_task_ipcspace(wq
->wq_task
), (mach_port_name_t
)tl
->th_thport
);
1349 thread_terminate(tl
->th_thread
);
1351 KERNEL_DEBUG(0xefffd030, (int)tl
->th_thread
, wq
->wq_nthreads
, tl
->th_flags
& TH_LIST_BLOCKED
, 0, 0);
1353 * drop our ref on the thread
1355 thread_deallocate(tl
->th_thread
);
1357 kfree(tl
, sizeof(struct threadlist
));
1363 workqueue_addnewthread(struct workqueue
*wq
)
1365 struct threadlist
*tl
;
1366 struct uthread
*uth
;
1371 mach_vm_offset_t stackaddr
;
1372 uint32_t affinity_tag
;
1376 kret
= thread_create(wq
->wq_task
, &th
);
1378 if (kret
!= KERN_SUCCESS
)
1381 tl
= kalloc(sizeof(struct threadlist
));
1382 bzero(tl
, sizeof(struct threadlist
));
1384 #if defined(__ppc__)
1385 stackaddr
= 0xF0000000;
1386 #elif defined(__i386__)
1387 stackaddr
= 0xB0000000;
1388 #elif defined(__arm__)
1389 stackaddr
= 0xB0000000; /* XXX ARM */
1391 #error Need to define a stack address hint for this architecture
1393 tl
->th_allocsize
= PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
+ p
->p_pthsize
;
1395 kret
= mach_vm_map(wq
->wq_map
, &stackaddr
,
1398 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
, NULL
,
1399 0, FALSE
, VM_PROT_DEFAULT
, VM_PROT_ALL
,
1400 VM_INHERIT_DEFAULT
);
1402 if (kret
!= KERN_SUCCESS
) {
1403 kret
= mach_vm_allocate(wq
->wq_map
,
1404 &stackaddr
, tl
->th_allocsize
,
1405 VM_MAKE_TAG(VM_MEMORY_STACK
) | VM_FLAGS_ANYWHERE
);
1407 if (kret
== KERN_SUCCESS
) {
1409 * The guard page is at the lowest address
1410 * The stack base is the highest address
1412 kret
= mach_vm_protect(wq
->wq_map
, stackaddr
, PTH_DEFAULT_GUARDSIZE
, FALSE
, VM_PROT_NONE
);
1414 if (kret
!= KERN_SUCCESS
)
1415 (void) mach_vm_deallocate(wq
->wq_map
, stackaddr
, tl
->th_allocsize
);
1417 if (kret
!= KERN_SUCCESS
) {
1418 (void) thread_terminate(th
);
1420 kfree(tl
, sizeof(struct threadlist
));
1424 thread_reference(th
);
1426 sright
= (void *) convert_thread_to_port(th
);
1427 tl
->th_thport
= (void *)ipc_port_copyout_send(sright
, get_task_ipcspace(wq
->wq_task
));
1429 thread_static_param(th
, TRUE
);
1431 workqueue_lock_spin(p
);
1433 affinity_tag
= wq
->wq_nextaffinitytag
;
1434 wq
->wq_nextaffinitytag
= (affinity_tag
+ 1) % wq
->wq_affinity_max
;
1436 workqueue_unlock(p
);
1438 tl
->th_flags
= TH_LIST_INITED
| TH_LIST_SUSPENDED
;
1442 tl
->th_stackaddr
= stackaddr
;
1443 tl
->th_affinity_tag
= affinity_tag
;
1445 #if defined(__ppc__)
1446 //ml_fp_setvalid(FALSE);
1447 thread_set_cthreadself(th
, (uint64_t)(tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
), IS_64BIT_PROCESS(p
));
1448 #endif /* __ppc__ */
1450 * affinity tag of 0 means no affinity...
1451 * but we want our tags to be 0 based because they
1452 * are used to index arrays, so...
1453 * keep it 0 based internally and bump by 1 when
1454 * calling out to set it
1456 (void)thread_affinity_set(th
, affinity_tag
+ 1);
1457 thread_sched_call(th
, workqueue_callback
);
1459 uth
= get_bsdthread_info(tl
->th_thread
);
1460 uth
->uu_threadlist
= (void *)tl
;
1462 workqueue_lock_spin(p
);
1464 TAILQ_INSERT_TAIL(&wq
->wq_thidlelist
[tl
->th_affinity_tag
], tl
, th_entry
);
1466 wq
->wq_thcount
[affinity_tag
]++;
1468 KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_START
, (int)current_thread(), affinity_tag
, wq
->wq_nthreads
, 0, (int)tl
->th_thread
);
1471 * work may have come into the queue while
1472 * no threads were available to run... since
1473 * we're adding a new thread, go evaluate the
1476 workqueue_run_nextitem(p
, THREAD_NULL
);
1478 * workqueue_run_nextitem is responsible for
1479 * dropping the workqueue lock in all cases
1486 workq_open(__unused
struct proc
*p
, __unused
struct workq_open_args
*uap
, __unused register_t
*retval
)
1488 struct workqueue
* wq
;
1495 struct workitem
* witem
;
1496 struct workitemlist
*wl
;
1500 if (p
->p_wqptr
== NULL
) {
1501 num_cpus
= ml_get_max_cpus();
1503 size
= (sizeof(struct workqueue
)) +
1504 (num_cpus
* sizeof(int *)) +
1505 (num_cpus
* sizeof(TAILQ_HEAD(, threadlist
)));
1507 ptr
= (char *)kalloc(size
);
1510 wq
= (struct workqueue
*)ptr
;
1511 wq
->wq_flags
= WQ_LIST_INITED
;
1513 wq
->wq_affinity_max
= num_cpus
;
1514 wq
->wq_task
= current_task();
1515 wq
->wq_map
= current_map();
1517 for (i
= 0; i
< WORKQUEUE_NUMPRIOS
; i
++) {
1518 wl
= (struct workitemlist
*)&wq
->wq_list
[i
];
1519 TAILQ_INIT(&wl
->wl_itemlist
);
1520 TAILQ_INIT(&wl
->wl_freelist
);
1522 for (j
= 0; j
< WORKITEM_SIZE
; j
++) {
1523 witem
= &wq
->wq_array
[(i
*WORKITEM_SIZE
) + j
];
1524 TAILQ_INSERT_TAIL(&wl
->wl_freelist
, witem
, wi_entry
);
1527 wq
->wq_thactivecount
= (uint32_t *)((char *)ptr
+ sizeof(struct workqueue
));
1528 wq
->wq_thcount
= (uint32_t *)&wq
->wq_thactivecount
[wq
->wq_affinity_max
];
1529 wq
->wq_thidlelist
= (struct wq_thidlelist
*)&wq
->wq_thcount
[wq
->wq_affinity_max
];
1531 for (i
= 0; i
< wq
->wq_affinity_max
; i
++)
1532 TAILQ_INIT(&wq
->wq_thidlelist
[i
]);
1534 TAILQ_INIT(&wq
->wq_thrunlist
);
1536 p
->p_wqptr
= (void *)wq
;
1539 workqueue_unlock(p
);
1541 wq
->wq_timer_call
= thread_call_allocate((thread_call_func_t
)workqueue_timer
, (thread_call_param_t
)wq
);
1543 for (i
= 0; i
< wq
->wq_affinity_max
; i
++) {
1544 (void)workqueue_addnewthread(wq
);
1546 /* If unable to create any threads, return error */
1547 if (wq
->wq_nthreads
== 0)
1549 workqueue_lock_spin(p
);
1551 microuptime(&wq
->wq_reduce_ts
);
1552 microuptime(&wq
->wq_lastran_ts
);
1553 wq
->wq_max_threads_scheduled
= 0;
1554 wq
->wq_stalled_count
= 0;
1556 workqueue_unlock(p
);
1562 workq_ops(struct proc
*p
, struct workq_ops_args
*uap
, __unused register_t
*retval
)
1564 int options
= uap
->options
;
1565 int prio
= uap
->prio
; /* should be used to find the right workqueue */
1566 user_addr_t item
= uap
->item
;
1568 thread_t th
= THREAD_NULL
;
1569 struct workqueue
*wq
;
1571 prio
+= 2; /* normalize prio -2 to +2 to 0 -4 */
1575 case WQOPS_QUEUE_ADD
: {
1577 KERNEL_DEBUG(0xefffd008 | DBG_FUNC_NONE
, (int)item
, 0, 0, 0, 0);
1579 workqueue_lock_spin(p
);
1581 if ((wq
= (struct workqueue
*)p
->p_wqptr
) == NULL
) {
1582 workqueue_unlock(p
);
1585 error
= workqueue_additem(wq
, prio
, item
);
1589 case WQOPS_QUEUE_REMOVE
: {
1591 workqueue_lock_spin(p
);
1593 if ((wq
= (struct workqueue
*)p
->p_wqptr
) == NULL
) {
1594 workqueue_unlock(p
);
1597 error
= workqueue_removeitem(wq
, prio
, item
);
1600 case WQOPS_THREAD_RETURN
: {
1602 th
= current_thread();
1604 KERNEL_DEBUG(0xefffd004 | DBG_FUNC_END
, 0, 0, 0, 0, 0);
1606 workqueue_lock_spin(p
);
1608 if ((wq
= (struct workqueue
*)p
->p_wqptr
) == NULL
) {
1609 workqueue_unlock(p
);
1617 workqueue_run_nextitem(p
, th
);
1619 * workqueue_run_nextitem is responsible for
1620 * dropping the workqueue lock in all cases
1626 workqueue_exit(struct proc
*p
)
1628 struct workqueue
* wq
;
1629 struct threadlist
* tl
, *tlist
;
1632 if (p
->p_wqptr
!= NULL
) {
1634 workqueue_lock_spin(p
);
1636 wq
= (struct workqueue
*)p
->p_wqptr
;
1639 workqueue_unlock(p
);
1644 if (wq
->wq_flags
& WQ_TIMER_RUNNING
)
1645 thread_call_cancel(wq
->wq_timer_call
);
1646 thread_call_free(wq
->wq_timer_call
);
1648 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thrunlist
, th_entry
, tlist
) {
1650 * drop our last ref on the thread
1652 thread_sched_call(tl
->th_thread
, NULL
);
1653 thread_deallocate(tl
->th_thread
);
1655 TAILQ_REMOVE(&wq
->wq_thrunlist
, tl
, th_entry
);
1656 kfree(tl
, sizeof(struct threadlist
));
1658 for (i
= 0; i
< wq
->wq_affinity_max
; i
++) {
1659 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thidlelist
[i
], th_entry
, tlist
) {
1661 * drop our last ref on the thread
1663 thread_sched_call(tl
->th_thread
, NULL
);
1664 thread_deallocate(tl
->th_thread
);
1666 TAILQ_REMOVE(&wq
->wq_thidlelist
[i
], tl
, th_entry
);
1667 kfree(tl
, sizeof(struct threadlist
));
1670 kfree(wq
, p
->p_wqsize
);
1675 workqueue_additem(struct workqueue
*wq
, int prio
, user_addr_t item
)
1677 struct workitem
*witem
;
1678 struct workitemlist
*wl
;
1680 wl
= (struct workitemlist
*)&wq
->wq_list
[prio
];
1682 if (TAILQ_EMPTY(&wl
->wl_freelist
))
1685 witem
= (struct workitem
*)TAILQ_FIRST(&wl
->wl_freelist
);
1686 TAILQ_REMOVE(&wl
->wl_freelist
, witem
, wi_entry
);
1688 witem
->wi_item
= item
;
1689 TAILQ_INSERT_TAIL(&wl
->wl_itemlist
, witem
, wi_entry
);
1691 if (wq
->wq_itemcount
== 0) {
1692 microuptime(&wq
->wq_lastran_ts
);
1693 wq
->wq_stalled_count
= 0;
1701 workqueue_removeitem(struct workqueue
*wq
, int prio
, user_addr_t item
)
1703 struct workitem
*witem
;
1704 struct workitemlist
*wl
;
1707 wl
= (struct workitemlist
*)&wq
->wq_list
[prio
];
1709 TAILQ_FOREACH(witem
, &wl
->wl_itemlist
, wi_entry
) {
1710 if (witem
->wi_item
== item
) {
1711 TAILQ_REMOVE(&wl
->wl_itemlist
, witem
, wi_entry
);
1714 witem
->wi_item
= (user_addr_t
)0;
1715 TAILQ_INSERT_HEAD(&wl
->wl_freelist
, witem
, wi_entry
);
1721 if (wq
->wq_itemcount
== 0)
1722 wq
->wq_flags
&= ~(WQ_ADD_TO_POOL
| WQ_TIMER_WATCH
);
1728 * workqueue_run_nextitem:
1729 * called with the workqueue lock held...
1730 * responsible for dropping it in all cases
1733 workqueue_run_nextitem(proc_t p
, thread_t thread
)
1735 struct workqueue
*wq
;
1736 struct workitem
*witem
= NULL
;
1737 user_addr_t item
= 0;
1738 thread_t th_to_run
= THREAD_NULL
;
1739 thread_t th_to_park
= THREAD_NULL
;
1740 int wake_thread
= 0;
1741 int reuse_thread
= 1;
1742 uint32_t stalled_affinity_count
= 0;
1744 uint32_t affinity_tag
;
1745 struct threadlist
*tl
= NULL
;
1746 struct uthread
*uth
= NULL
;
1747 struct workitemlist
*wl
;
1748 boolean_t start_timer
= FALSE
;
1749 struct timeval tv
, lat_tv
;
1751 wq
= (struct workqueue
*)p
->p_wqptr
;
1753 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_START
, (int)thread
, wq
->wq_threads_scheduled
, wq
->wq_stalled_count
, 0, 0);
1755 if (wq
->wq_itemcount
== 0) {
1756 if ((th_to_park
= thread
) == THREAD_NULL
)
1760 if (thread
!= THREAD_NULL
) {
1762 * we're a worker thread from the pool... currently we
1763 * are considered 'active' which means we're counted
1764 * in "wq_thactivecount"
1766 uth
= get_bsdthread_info(thread
);
1767 tl
= uth
->uu_threadlist
;
1769 if (wq
->wq_thactivecount
[tl
->th_affinity_tag
] == 1) {
1771 * we're the only active thread associated with our
1772 * affinity group, so pick up some work and keep going
1778 for (affinity_tag
= 0; affinity_tag
< wq
->wq_affinity_max
; affinity_tag
++) {
1780 * look for first affinity group that is currently not active
1781 * and has at least 1 idle thread
1783 if (wq
->wq_thactivecount
[affinity_tag
] == 0) {
1784 if (!TAILQ_EMPTY(&wq
->wq_thidlelist
[affinity_tag
]))
1786 stalled_affinity_count
++;
1789 if (thread
== THREAD_NULL
) {
1791 * we're not one of the 'worker' threads
1793 if (affinity_tag
>= wq
->wq_affinity_max
) {
1795 * we've already got at least 1 thread per
1796 * affinity group in the active state... or
1797 * we've got no idle threads to play with
1799 if (stalled_affinity_count
) {
1801 if ( !(wq
->wq_flags
& WQ_TIMER_RUNNING
) ) {
1802 wq
->wq_flags
|= WQ_TIMER_RUNNING
;
1805 wq
->wq_flags
|= WQ_TIMER_WATCH
;
1811 * we're overbooked on the affinity group we're associated with,
1812 * so park this thread
1814 th_to_park
= thread
;
1816 if (affinity_tag
>= wq
->wq_affinity_max
) {
1818 * all the affinity groups have active threads
1819 * running, or there are no idle threads to
1822 if (stalled_affinity_count
) {
1824 if ( !(wq
->wq_flags
& WQ_TIMER_RUNNING
) ) {
1825 wq
->wq_flags
|= WQ_TIMER_RUNNING
;
1828 wq
->wq_flags
|= WQ_TIMER_WATCH
;
1833 * we've got a candidate (affinity group with no currently
1834 * active threads) to start a new thread on...
1835 * we already know there is both work available
1836 * and an idle thread with the correct affinity tag, so
1837 * fall into the code that pulls a new thread and workitem...
1838 * once we've kicked that thread off, we'll park this one
1841 tl
= TAILQ_FIRST(&wq
->wq_thidlelist
[affinity_tag
]);
1842 TAILQ_REMOVE(&wq
->wq_thidlelist
[affinity_tag
], tl
, th_entry
);
1844 th_to_run
= tl
->th_thread
;
1845 TAILQ_INSERT_TAIL(&wq
->wq_thrunlist
, tl
, th_entry
);
1847 if ((tl
->th_flags
& TH_LIST_SUSPENDED
) == TH_LIST_SUSPENDED
) {
1848 tl
->th_flags
&= ~TH_LIST_SUSPENDED
;
1850 } else if ((tl
->th_flags
& TH_LIST_BLOCKED
) == TH_LIST_BLOCKED
) {
1851 tl
->th_flags
&= ~TH_LIST_BLOCKED
;
1854 tl
->th_flags
|= TH_LIST_RUNNING
;
1856 wq
->wq_threads_scheduled
++;
1858 if (wq
->wq_threads_scheduled
> wq
->wq_max_threads_scheduled
)
1859 wq
->wq_max_threads_scheduled
= wq
->wq_threads_scheduled
;
1862 for (i
= 0; i
< WORKQUEUE_NUMPRIOS
; i
++) {
1863 wl
= (struct workitemlist
*)&wq
->wq_list
[i
];
1865 if (!(TAILQ_EMPTY(&wl
->wl_itemlist
))) {
1867 witem
= TAILQ_FIRST(&wl
->wl_itemlist
);
1868 TAILQ_REMOVE(&wl
->wl_itemlist
, witem
, wi_entry
);
1871 item
= witem
->wi_item
;
1872 witem
->wi_item
= (user_addr_t
)0;
1873 TAILQ_INSERT_HEAD(&wl
->wl_freelist
, witem
, wi_entry
);
1879 panic("workq_run_nextitem: NULL witem");
1881 if (thread
!= th_to_run
) {
1883 * we're starting up a thread from a parked/suspended condition
1885 OSAddAtomic(1, (SInt32
*)&wq
->wq_thactivecount
[tl
->th_affinity_tag
]);
1886 OSAddAtomic(1, (SInt32
*)&tl
->th_unparked
);
1888 if (wq
->wq_itemcount
== 0)
1889 wq
->wq_flags
&= ~WQ_TIMER_WATCH
;
1893 * if we had any affinity groups stall (no threads runnable)
1894 * since we last scheduled an item... and
1895 * the elapsed time since we last scheduled an item
1896 * exceeds the latency tolerance...
1897 * we ask the timer thread (which should already be running)
1898 * to add some more threads to the pool
1900 if (wq
->wq_stalled_count
&& !(wq
->wq_flags
& WQ_ADD_TO_POOL
)) {
1901 timersub(&tv
, &wq
->wq_lastran_ts
, &lat_tv
);
1903 if (((lat_tv
.tv_sec
* 1000000) + lat_tv
.tv_usec
) > wq_max_run_latency_usecs
)
1904 wq
->wq_flags
|= WQ_ADD_TO_POOL
;
1906 KERNEL_DEBUG(0xefffd10c, wq
->wq_stalled_count
, lat_tv
.tv_sec
, lat_tv
.tv_usec
, wq
->wq_flags
, 0);
1908 wq
->wq_lastran_ts
= tv
;
1910 wq
->wq_stalled_count
= 0;
1911 workqueue_unlock(p
);
1913 KERNEL_DEBUG(0xefffd02c, wq
->wq_thactivecount
[0], wq
->wq_thactivecount
[1],
1914 wq
->wq_thactivecount
[2], wq
->wq_thactivecount
[3], 0);
1916 KERNEL_DEBUG(0xefffd02c, wq
->wq_thactivecount
[4], wq
->wq_thactivecount
[5],
1917 wq
->wq_thactivecount
[6], wq
->wq_thactivecount
[7], 0);
1920 * if current thread is reused for workitem, does not return via unix_syscall
1922 wq_runitem(p
, item
, th_to_run
, tl
, reuse_thread
, wake_thread
, (thread
== th_to_run
));
1924 if (th_to_park
== THREAD_NULL
) {
1926 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END
, (int)thread
, (int)item
, wq
->wq_flags
, 1, 0);
1930 workqueue_lock_spin(p
);
1933 wq
->wq_threads_scheduled
--;
1935 * this is a workqueue thread with no more
1936 * work to do... park it for now
1938 uth
= get_bsdthread_info(th_to_park
);
1939 tl
= uth
->uu_threadlist
;
1941 panic("wq thread with no threadlist ");
1943 TAILQ_REMOVE(&wq
->wq_thrunlist
, tl
, th_entry
);
1944 tl
->th_flags
&= ~TH_LIST_RUNNING
;
1946 tl
->th_flags
|= TH_LIST_BLOCKED
;
1947 TAILQ_INSERT_HEAD(&wq
->wq_thidlelist
[tl
->th_affinity_tag
], tl
, th_entry
);
1949 assert_wait((caddr_t
)tl
, (THREAD_INTERRUPTIBLE
));
1951 workqueue_unlock(p
);
1954 workqueue_interval_timer_start(wq
->wq_timer_call
, wq_timer_interval_msecs
);
1956 KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_START
, (int)current_thread(), wq
->wq_threads_scheduled
, 0, 0, (int)th_to_park
);
1958 thread_block((thread_continue_t
)thread_exception_return
);
1960 panic("unexpected return from thread_block");
1963 workqueue_unlock(p
);
1966 workqueue_interval_timer_start(wq
->wq_timer_call
, wq_timer_interval_msecs
);
1968 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END
, (int)thread
, 0, wq
->wq_flags
, 2, 0);
1974 wq_runitem(proc_t p
, user_addr_t item
, thread_t th
, struct threadlist
*tl
,
1975 int reuse_thread
, int wake_thread
, int return_directly
)
1979 KERNEL_DEBUG1(0xefffd004 | DBG_FUNC_START
, (int)current_thread(), (int)item
, wake_thread
, tl
->th_affinity_tag
, (int)th
);
1981 ret
= setup_wqthread(p
, th
, item
, reuse_thread
, tl
);
1984 panic("setup_wqthread failed %x\n", ret
);
1986 if (return_directly
) {
1987 thread_exception_return();
1989 panic("wq_runitem: thread_exception_return returned ...\n");
1992 KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_END
, (int)current_thread(), 0, 0, 0, (int)th
);
1996 KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END
, (int)current_thread(), 0, 0, 0, (int)th
);
2004 setup_wqthread(proc_t p
, thread_t th
, user_addr_t item
, int reuse_thread
, struct threadlist
*tl
)
2007 #if defined(__ppc__)
2009 * Set up PowerPC registers...
2010 * internally they are always kept as 64 bit and
2011 * since the register set is the same between 32 and 64bit modes
2012 * we don't need 2 different methods for setting the state
2015 ppc_thread_state64_t state64
;
2016 ppc_thread_state64_t
*ts64
= &state64
;
2018 ts64
->srr0
= (uint64_t)p
->p_wqthread
;
2019 ts64
->r1
= (uint64_t)((tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
) - C_ARGSAVE_LEN
- C_RED_ZONE
);
2020 ts64
->r3
= (uint64_t)(tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
);
2021 ts64
->r4
= (uint64_t)((unsigned int)tl
->th_thport
);
2022 ts64
->r5
= (uint64_t)(tl
->th_stackaddr
+ PTH_DEFAULT_GUARDSIZE
);
2023 ts64
->r6
= (uint64_t)item
;
2024 ts64
->r7
= (uint64_t)reuse_thread
;
2025 ts64
->r8
= (uint64_t)0;
2027 thread_set_wq_state64(th
, (thread_state_t
)ts64
);
2029 #elif defined(__i386__)
2032 isLP64
= IS_64BIT_PROCESS(p
);
2034 * Set up i386 registers & function call.
2037 x86_thread_state32_t state
;
2038 x86_thread_state32_t
*ts
= &state
;
2040 ts
->eip
= (int)p
->p_wqthread
;
2041 ts
->eax
= (unsigned int)(tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
);
2042 ts
->ebx
= (unsigned int)tl
->th_thport
;
2043 ts
->ecx
= (unsigned int)(tl
->th_stackaddr
+ PTH_DEFAULT_GUARDSIZE
);
2044 ts
->edx
= (unsigned int)item
;
2045 ts
->edi
= (unsigned int)reuse_thread
;
2046 ts
->esi
= (unsigned int)0;
2050 ts
->esp
= (int)((vm_offset_t
)((tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
) - C_32_STK_ALIGN
));
2052 thread_set_wq_state32(th
, (thread_state_t
)ts
);
2055 x86_thread_state64_t state64
;
2056 x86_thread_state64_t
*ts64
= &state64
;
2058 ts64
->rip
= (uint64_t)p
->p_wqthread
;
2059 ts64
->rdi
= (uint64_t)(tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
);
2060 ts64
->rsi
= (uint64_t)((unsigned int)(tl
->th_thport
));
2061 ts64
->rdx
= (uint64_t)(tl
->th_stackaddr
+ PTH_DEFAULT_GUARDSIZE
);
2062 ts64
->rcx
= (uint64_t)item
;
2063 ts64
->r8
= (uint64_t)reuse_thread
;
2064 ts64
->r9
= (uint64_t)0;
2067 * set stack pointer aligned to 16 byte boundary
2069 ts64
->rsp
= (uint64_t)((tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ PTH_DEFAULT_GUARDSIZE
) - C_64_REDZONE_LEN
);
2071 thread_set_wq_state64(th
, (thread_state_t
)ts64
);
2073 #elif defined(__arm__)
2074 arm_thread_state_t state
;
2075 arm_thread_state_t
*ts
= &state
;
2077 /* XXX ARM add more */
2078 ts
->pc
= p
->p_wqthread
;
2079 ts
->sp
= tl
->th_stackaddr
+ PTH_DEFAULT_GUARDSIZE
;
2081 thread_set_wq_state32(th
, (thread_state_t
)ts
);
2083 #error setup_wqthread not defined for this architecture