]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/pthread_synch.c
xnu-1456.1.26.tar.gz
[apple/xnu.git] / bsd / kern / pthread_synch.c
CommitLineData
2d21ac55 1/*
b0d623f7 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
2d21ac55
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
29/*
30 * pthread_synch.c
31 */
32
33#define _PTHREAD_CONDATTR_T
34#define _PTHREAD_COND_T
35#define _PTHREAD_MUTEXATTR_T
36#define _PTHREAD_MUTEX_T
37#define _PTHREAD_RWLOCKATTR_T
38#define _PTHREAD_RWLOCK_T
39
40#undef pthread_mutexattr_t
41#undef pthread_mutex_t
42#undef pthread_condattr_t
43#undef pthread_cond_t
44#undef pthread_rwlockattr_t
45#undef pthread_rwlock_t
46
47#include <sys/param.h>
48#include <sys/queue.h>
49#include <sys/resourcevar.h>
50#include <sys/proc_internal.h>
51#include <sys/kauth.h>
52#include <sys/systm.h>
53#include <sys/timeb.h>
54#include <sys/times.h>
55#include <sys/acct.h>
56#include <sys/kernel.h>
57#include <sys/wait.h>
58#include <sys/signalvar.h>
59#include <sys/syslog.h>
60#include <sys/stat.h>
61#include <sys/lock.h>
62#include <sys/kdebug.h>
63#include <sys/sysproto.h>
64#include <sys/pthread_internal.h>
65#include <sys/vm.h>
66#include <sys/user.h> /* for coredump */
b0d623f7 67#include <sys/proc_info.h> /* for fill_procworkqueue */
2d21ac55
A
68
69
70#include <mach/mach_types.h>
71#include <mach/vm_prot.h>
72#include <mach/semaphore.h>
73#include <mach/sync_policy.h>
74#include <mach/task.h>
75#include <kern/kern_types.h>
76#include <kern/task.h>
77#include <kern/clock.h>
78#include <mach/kern_return.h>
79#include <kern/thread.h>
80#include <kern/sched_prim.h>
81#include <kern/kalloc.h>
82#include <kern/sched_prim.h> /* for thread_exception_return */
83#include <kern/processor.h>
84#include <kern/affinity.h>
b0d623f7 85#include <kern/assert.h>
2d21ac55
A
86#include <mach/mach_vm.h>
87#include <mach/mach_param.h>
88#include <mach/thread_status.h>
89#include <mach/thread_policy.h>
90#include <mach/message.h>
91#include <mach/port.h>
92#include <vm/vm_protos.h>
b0d623f7 93#include <vm/vm_map.h> /* for current_map() */
2d21ac55
A
94#include <mach/thread_act.h> /* for thread_resume */
95#include <machine/machine_routines.h>
96#if defined(__i386__)
97#include <i386/machine_routines.h>
98#include <i386/eflags.h>
99#include <i386/psl.h>
100#include <i386/seg.h>
101#endif
102
103#include <libkern/OSAtomic.h>
104
105#if 0
106#undef KERNEL_DEBUG
107#define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
108#undef KERNEL_DEBUG1
109#define KERNEL_DEBUG1 KERNEL_DEBUG_CONSTANT1
110#endif
111
112
113#if defined(__ppc__) || defined(__ppc64__)
114#include <architecture/ppc/cframe.h>
115#endif
116
117
118lck_grp_attr_t *pthread_lck_grp_attr;
119lck_grp_t *pthread_lck_grp;
120lck_attr_t *pthread_lck_attr;
2d21ac55
A
121
122extern kern_return_t thread_getstatus(register thread_t act, int flavor,
123 thread_state_t tstate, mach_msg_type_number_t *count);
124extern kern_return_t thread_setstatus(thread_t thread, int flavor,
125 thread_state_t tstate, mach_msg_type_number_t count);
126extern void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64);
127extern kern_return_t mach_port_deallocate(ipc_space_t, mach_port_name_t);
128extern kern_return_t semaphore_signal_internal_trap(mach_port_name_t);
129
b0d623f7
A
130extern void workqueue_thread_yielded(void);
131
132static int workqueue_additem(struct workqueue *wq, int prio, user_addr_t item, int affinity);
2d21ac55 133static int workqueue_removeitem(struct workqueue *wq, int prio, user_addr_t item);
b0d623f7
A
134static boolean_t workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t th,
135 user_addr_t oc_item, int oc_prio, int oc_affinity);
2d21ac55
A
136static void wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
137 int reuse_thread, int wake_thread, int return_directly);
b0d623f7 138static void wq_unpark_continue(void);
2d21ac55 139static int setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl);
b0d623f7
A
140static boolean_t workqueue_addnewthread(struct workqueue *wq);
141static void workqueue_removethread(struct threadlist *tl);
2d21ac55
A
142static void workqueue_lock_spin(proc_t);
143static void workqueue_unlock(proc_t);
b0d623f7
A
144int proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc);
145int proc_setalltargetconc(pid_t pid, int32_t * targetconcp);
146
147#define WQ_MAXPRI_MIN 0 /* low prio queue num */
148#define WQ_MAXPRI_MAX 2 /* max prio queuenum */
149#define WQ_PRI_NUM 3 /* number of prio work queues */
2d21ac55
A
150
151#define C_32_STK_ALIGN 16
152#define C_64_STK_ALIGN 16
153#define C_64_REDZONE_LEN 128
154#define TRUNC_DOWN32(a,c) ((((uint32_t)a)-(c)) & ((uint32_t)(-(c))))
155#define TRUNC_DOWN64(a,c) ((((uint64_t)a)-(c)) & ((uint64_t)(-(c))))
156
157
158/*
159 * Flags filed passed to bsdthread_create and back in pthread_start
16031 <---------------------------------> 0
161_________________________________________
162| flags(8) | policy(8) | importance(16) |
163-----------------------------------------
164*/
165void _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags);
166
167#define PTHREAD_START_CUSTOM 0x01000000
168#define PTHREAD_START_SETSCHED 0x02000000
169#define PTHREAD_START_DETACHED 0x04000000
170#define PTHREAD_START_POLICY_BITSHIFT 16
4a3eedf9 171#define PTHREAD_START_POLICY_MASK 0xff
2d21ac55
A
172#define PTHREAD_START_IMPORTANCE_MASK 0xffff
173
174#define SCHED_OTHER POLICY_TIMESHARE
175#define SCHED_FIFO POLICY_FIFO
176#define SCHED_RR POLICY_RR
177
2d21ac55
A
178
179
2d21ac55
A
180int
181bsdthread_create(__unused struct proc *p, struct bsdthread_create_args *uap, user_addr_t *retval)
182{
183 kern_return_t kret;
184 void * sright;
185 int error = 0;
186 int allocated = 0;
187 mach_vm_offset_t stackaddr;
188 mach_vm_size_t th_allocsize = 0;
189 mach_vm_size_t user_stacksize;
190 mach_vm_size_t th_stacksize;
191 mach_vm_offset_t th_stackaddr;
192 mach_vm_offset_t th_stack;
193 mach_vm_offset_t th_pthread;
b0d623f7 194 mach_port_name_t th_thport;
2d21ac55
A
195 thread_t th;
196 user_addr_t user_func = uap->func;
197 user_addr_t user_funcarg = uap->func_arg;
198 user_addr_t user_stack = uap->stack;
199 user_addr_t user_pthread = uap->pthread;
200 unsigned int flags = (unsigned int)uap->flags;
201 vm_map_t vmap = current_map();
202 task_t ctask = current_task();
203 unsigned int policy, importance;
204
205 int isLP64 = 0;
206
207
b0d623f7
A
208 if ((p->p_lflag & P_LREGISTER) == 0)
209 return(EINVAL);
2d21ac55
A
210#if 0
211 KERNEL_DEBUG_CONSTANT(0x9000080 | DBG_FUNC_START, flags, 0, 0, 0, 0);
212#endif
213
214 isLP64 = IS_64BIT_PROCESS(p);
215
216
217#if defined(__ppc__)
218 stackaddr = 0xF0000000;
b0d623f7 219#elif defined(__i386__) || defined(__x86_64__)
2d21ac55 220 stackaddr = 0xB0000000;
2d21ac55
A
221#else
222#error Need to define a stack address hint for this architecture
223#endif
224 kret = thread_create(ctask, &th);
225 if (kret != KERN_SUCCESS)
226 return(ENOMEM);
227 thread_reference(th);
228
229 sright = (void *) convert_thread_to_port(th);
b0d623f7 230 th_thport = ipc_port_copyout_send(sright, get_task_ipcspace(ctask));
2d21ac55
A
231
232 if ((flags & PTHREAD_START_CUSTOM) == 0) {
233 th_stacksize = (mach_vm_size_t)user_stack; /* if it is custom them it is stacksize */
234 th_allocsize = th_stacksize + PTH_DEFAULT_GUARDSIZE + p->p_pthsize;
235
236 kret = mach_vm_map(vmap, &stackaddr,
237 th_allocsize,
238 page_size-1,
239 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
240 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
241 VM_INHERIT_DEFAULT);
242 if (kret != KERN_SUCCESS)
243 kret = mach_vm_allocate(vmap,
244 &stackaddr, th_allocsize,
245 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
246 if (kret != KERN_SUCCESS) {
247 error = ENOMEM;
248 goto out;
249 }
250#if 0
251 KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_NONE, th_allocsize, stackaddr, 0, 2, 0);
252#endif
253 th_stackaddr = stackaddr;
254 allocated = 1;
255 /*
256 * The guard page is at the lowest address
257 * The stack base is the highest address
258 */
259 kret = mach_vm_protect(vmap, stackaddr, PTH_DEFAULT_GUARDSIZE, FALSE, VM_PROT_NONE);
260
261 if (kret != KERN_SUCCESS) {
262 error = ENOMEM;
263 goto out1;
264 }
265 th_stack = (stackaddr + th_stacksize + PTH_DEFAULT_GUARDSIZE);
266 th_pthread = (stackaddr + th_stacksize + PTH_DEFAULT_GUARDSIZE);
267 user_stacksize = th_stacksize;
268 } else {
269 th_stack = user_stack;
270 user_stacksize = user_stack;
271 th_pthread = user_pthread;
272#if 0
273 KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_NONE, 0, 0, 0, 3, 0);
274#endif
275 }
276
277#if defined(__ppc__)
278 /*
279 * Set up PowerPC registers...
280 * internally they are always kept as 64 bit and
281 * since the register set is the same between 32 and 64bit modes
282 * we don't need 2 different methods for setting the state
283 */
284 {
285 ppc_thread_state64_t state64;
286 ppc_thread_state64_t *ts64 = &state64;
287
288 ts64->srr0 = (uint64_t)p->p_threadstart;
289 ts64->r1 = (uint64_t)(th_stack - C_ARGSAVE_LEN - C_RED_ZONE);
290 ts64->r3 = (uint64_t)th_pthread;
b0d623f7 291 ts64->r4 = (uint64_t)(th_thport);
2d21ac55
A
292 ts64->r5 = (uint64_t)user_func;
293 ts64->r6 = (uint64_t)user_funcarg;
294 ts64->r7 = (uint64_t)user_stacksize;
295 ts64->r8 = (uint64_t)uap->flags;
296
297 thread_set_wq_state64(th, (thread_state_t)ts64);
298
299 thread_set_cthreadself(th, (uint64_t)th_pthread, isLP64);
300 }
b0d623f7 301#elif defined(__i386__) || defined(__x86_64__)
2d21ac55
A
302 {
303 /*
304 * Set up i386 registers & function call.
305 */
306 if (isLP64 == 0) {
307 x86_thread_state32_t state;
308 x86_thread_state32_t *ts = &state;
309
310 ts->eip = (int)p->p_threadstart;
311 ts->eax = (unsigned int)th_pthread;
312 ts->ebx = (unsigned int)th_thport;
313 ts->ecx = (unsigned int)user_func;
314 ts->edx = (unsigned int)user_funcarg;
315 ts->edi = (unsigned int)user_stacksize;
316 ts->esi = (unsigned int)uap->flags;
317 /*
318 * set stack pointer
319 */
320 ts->esp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN));
321
322 thread_set_wq_state32(th, (thread_state_t)ts);
323
324 } else {
325 x86_thread_state64_t state64;
326 x86_thread_state64_t *ts64 = &state64;
327
328 ts64->rip = (uint64_t)p->p_threadstart;
329 ts64->rdi = (uint64_t)th_pthread;
b0d623f7 330 ts64->rsi = (uint64_t)(th_thport);
2d21ac55
A
331 ts64->rdx = (uint64_t)user_func;
332 ts64->rcx = (uint64_t)user_funcarg;
333 ts64->r8 = (uint64_t)user_stacksize;
334 ts64->r9 = (uint64_t)uap->flags;
335 /*
336 * set stack pointer aligned to 16 byte boundary
337 */
338 ts64->rsp = (uint64_t)(th_stack - C_64_REDZONE_LEN);
339
340 thread_set_wq_state64(th, (thread_state_t)ts64);
341 }
342 }
2d21ac55
A
343#else
344#error bsdthread_create not defined for this architecture
345#endif
346 /* Set scheduling parameters if needed */
347 if ((flags & PTHREAD_START_SETSCHED) != 0) {
348 thread_extended_policy_data_t extinfo;
349 thread_precedence_policy_data_t precedinfo;
350
351 importance = (flags & PTHREAD_START_IMPORTANCE_MASK);
352 policy = (flags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK;
353
354 if (policy == SCHED_OTHER)
355 extinfo.timeshare = 1;
356 else
357 extinfo.timeshare = 0;
358 thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT);
359
4a3eedf9
A
360#define BASEPRI_DEFAULT 31
361 precedinfo.importance = (importance - BASEPRI_DEFAULT);
2d21ac55
A
362 thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
363 }
364
365 kret = thread_resume(th);
366 if (kret != KERN_SUCCESS) {
367 error = EINVAL;
368 goto out1;
369 }
370 thread_deallocate(th); /* drop the creator reference */
371#if 0
b0d623f7 372 KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_END, error, th_pthread, 0, 0, 0);
2d21ac55
A
373#endif
374 *retval = th_pthread;
375
376 return(0);
377
378out1:
379 if (allocated != 0)
380 (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize);
381out:
b0d623f7 382 (void)mach_port_deallocate(get_task_ipcspace(ctask), th_thport);
2d21ac55
A
383 (void)thread_terminate(th);
384 (void)thread_deallocate(th);
385 return(error);
386}
387
388int
b0d623f7 389bsdthread_terminate(__unused struct proc *p, struct bsdthread_terminate_args *uap, __unused int32_t *retval)
2d21ac55
A
390{
391 mach_vm_offset_t freeaddr;
392 mach_vm_size_t freesize;
393 kern_return_t kret;
394 mach_port_name_t kthport = (mach_port_name_t)uap->port;
395 mach_port_name_t sem = (mach_port_name_t)uap->sem;
396
397 freeaddr = (mach_vm_offset_t)uap->stackaddr;
398 freesize = uap->freesize;
399
400#if 0
b0d623f7 401 KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_START, freeaddr, freesize, kthport, 0xff, 0);
2d21ac55 402#endif
593a1d5f
A
403 if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) {
404 kret = mach_vm_deallocate(current_map(), freeaddr, freesize);
2d21ac55
A
405 if (kret != KERN_SUCCESS) {
406 return(EINVAL);
407 }
408 }
593a1d5f
A
409
410 (void) thread_terminate(current_thread());
411 if (sem != MACH_PORT_NULL) {
412 kret = semaphore_signal_internal_trap(sem);
2d21ac55
A
413 if (kret != KERN_SUCCESS) {
414 return(EINVAL);
415 }
416 }
417
2d21ac55
A
418 if (kthport != MACH_PORT_NULL)
419 mach_port_deallocate(get_task_ipcspace(current_task()), kthport);
420 thread_exception_return();
421 panic("bsdthread_terminate: still running\n");
422#if 0
423 KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_END, 0, 0, 0, 0xff, 0);
424#endif
425 return(0);
426}
427
428
429int
b0d623f7 430bsdthread_register(struct proc *p, struct bsdthread_register_args *uap, __unused int32_t *retval)
2d21ac55 431{
b0d623f7
A
432 /* prevent multiple registrations */
433 if ((p->p_lflag & P_LREGISTER) != 0)
434 return(EINVAL);
2d21ac55
A
435 /* syscall randomizer test can pass bogus values */
436 if (uap->pthsize > MAX_PTHREAD_SIZE) {
437 return(EINVAL);
438 }
439 p->p_threadstart = uap->threadstart;
440 p->p_wqthread = uap->wqthread;
441 p->p_pthsize = uap->pthsize;
b0d623f7
A
442 p->p_targconc = uap->targetconc_ptr;
443 p->p_dispatchqueue_offset = uap->dispatchqueue_offset;
444 proc_setregister(p);
2d21ac55
A
445
446 return(0);
447}
448
449
b0d623f7
A
450uint32_t wq_yielded_threshold = WQ_YIELDED_THRESHOLD;
451uint32_t wq_yielded_window_usecs = WQ_YIELDED_WINDOW_USECS;
452uint32_t wq_stalled_window_usecs = WQ_STALLED_WINDOW_USECS;
453uint32_t wq_reduce_pool_window_usecs = WQ_REDUCE_POOL_WINDOW_USECS;
454uint32_t wq_max_timer_interval_usecs = WQ_MAX_TIMER_INTERVAL_USECS;
455uint32_t wq_max_threads = WORKQUEUE_MAXTHREADS;
2d21ac55
A
456
457
b0d623f7
A
458SYSCTL_INT(_kern, OID_AUTO, wq_yielded_threshold, CTLFLAG_RW,
459 &wq_yielded_threshold, 0, "");
2d21ac55 460
b0d623f7
A
461SYSCTL_INT(_kern, OID_AUTO, wq_yielded_window_usecs, CTLFLAG_RW,
462 &wq_yielded_window_usecs, 0, "");
2d21ac55
A
463
464SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW,
465 &wq_stalled_window_usecs, 0, "");
466
467SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW,
468 &wq_reduce_pool_window_usecs, 0, "");
469
b0d623f7
A
470SYSCTL_INT(_kern, OID_AUTO, wq_max_timer_interval_usecs, CTLFLAG_RW,
471 &wq_max_timer_interval_usecs, 0, "");
2d21ac55 472
b0d623f7
A
473SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW,
474 &wq_max_threads, 0, "");
2d21ac55
A
475
476
477void
478workqueue_init_lock(proc_t p)
479{
b0d623f7
A
480 lck_spin_init(&p->p_wqlock, pthread_lck_grp, pthread_lck_attr);
481
482 p->p_wqiniting = FALSE;
2d21ac55
A
483}
484
485void
486workqueue_destroy_lock(proc_t p)
487{
b0d623f7 488 lck_spin_destroy(&p->p_wqlock, pthread_lck_grp);
2d21ac55
A
489}
490
b0d623f7 491
2d21ac55
A
492static void
493workqueue_lock_spin(proc_t p)
494{
b0d623f7 495 lck_spin_lock(&p->p_wqlock);
2d21ac55
A
496}
497
498static void
499workqueue_unlock(proc_t p)
500{
b0d623f7 501 lck_spin_unlock(&p->p_wqlock);
2d21ac55
A
502}
503
504
2d21ac55 505static void
b0d623f7 506workqueue_interval_timer_start(struct workqueue *wq)
2d21ac55
A
507{
508 uint64_t deadline;
509
b0d623f7
A
510 if (wq->wq_timer_interval == 0)
511 wq->wq_timer_interval = wq_stalled_window_usecs;
512 else {
513 wq->wq_timer_interval = wq->wq_timer_interval * 2;
514
515 if (wq->wq_timer_interval > wq_max_timer_interval_usecs)
516 wq->wq_timer_interval = wq_max_timer_interval_usecs;
517 }
518 clock_interval_to_deadline(wq->wq_timer_interval, 1000, &deadline);
519
520 thread_call_enter_delayed(wq->wq_atimer_call, deadline);
521
522 KERNEL_DEBUG(0xefffd110, wq, wq->wq_itemcount, wq->wq_flags, wq->wq_timer_interval, 0);
523}
524
525
526static boolean_t
527wq_thread_is_busy(uint64_t cur_ts, uint64_t *lastblocked_tsp)
528{ clock_sec_t secs;
529 clock_usec_t usecs;
530 uint64_t lastblocked_ts;
531 uint64_t elapsed;
532
533 /*
534 * the timestamp is updated atomically w/o holding the workqueue lock
535 * so we need to do an atomic read of the 64 bits so that we don't see
536 * a mismatched pair of 32 bit reads... we accomplish this in an architecturally
537 * independent fashion by using OSCompareAndSwap64 to write back the
538 * value we grabbed... if it succeeds, then we have a good timestamp to
539 * evaluate... if it fails, we straddled grabbing the timestamp while it
540 * was being updated... treat a failed update as a busy thread since
541 * it implies we are about to see a really fresh timestamp anyway
542 */
543 lastblocked_ts = *lastblocked_tsp;
544
545#if defined(__ppc__)
546#else
547 if ( !OSCompareAndSwap64((UInt64)lastblocked_ts, (UInt64)lastblocked_ts, lastblocked_tsp))
548 return (TRUE);
549#endif
550 if (lastblocked_ts >= cur_ts) {
551 /*
552 * because the update of the timestamp when a thread blocks isn't
553 * serialized against us looking at it (i.e. we don't hold the workq lock)
554 * it's possible to have a timestamp that matches the current time or
555 * that even looks to be in the future relative to when we grabbed the current
556 * time... just treat this as a busy thread since it must have just blocked.
557 */
558 return (TRUE);
559 }
560 elapsed = cur_ts - lastblocked_ts;
561
562 absolutetime_to_microtime(elapsed, &secs, &usecs);
2d21ac55 563
b0d623f7
A
564 if (secs == 0 && usecs < wq_stalled_window_usecs)
565 return (TRUE);
566 return (FALSE);
2d21ac55
A
567}
568
569
b0d623f7
A
570#define WQ_TIMER_NEEDED(wq, start_timer) do { \
571 int oldflags = wq->wq_flags; \
572 \
573 if ( !(oldflags & (WQ_EXITING | WQ_ATIMER_RUNNING))) { \
574 if (OSCompareAndSwap(oldflags, oldflags | WQ_ATIMER_RUNNING, (UInt32 *)&wq->wq_flags)) \
575 start_timer = TRUE; \
576 } \
577} while (0)
578
579
580
2d21ac55 581static void
b0d623f7 582workqueue_add_timer(struct workqueue *wq, __unused int param1)
2d21ac55 583{
b0d623f7
A
584 proc_t p;
585 boolean_t start_timer = FALSE;
586 boolean_t retval;
587 boolean_t add_thread;
588 uint32_t busycount;
2d21ac55 589
b0d623f7
A
590 KERNEL_DEBUG(0xefffd108 | DBG_FUNC_START, wq, wq->wq_flags, wq->wq_nthreads, wq->wq_thidlecount, 0);
591
592 p = wq->wq_proc;
2d21ac55 593
b0d623f7 594 workqueue_lock_spin(p);
2d21ac55
A
595
596 /*
b0d623f7
A
597 * because workqueue_callback now runs w/o taking the workqueue lock
598 * we are unsynchronized w/r to a change in state of the running threads...
599 * to make sure we always evaluate that change, we allow it to start up
600 * a new timer if the current one is actively evalutating the state
601 * however, we do not need more than 2 timers fired up (1 active and 1 pending)
602 * and we certainly do not want 2 active timers evaluating the state
603 * simultaneously... so use WQL_ATIMER_BUSY to serialize the timers...
604 * note that WQL_ATIMER_BUSY is in a different flag word from WQ_ATIMER_RUNNING since
605 * it is always protected by the workq lock... WQ_ATIMER_RUNNING is evaluated
606 * and set atomimcally since the callback function needs to manipulate it
607 * w/o holding the workq lock...
2d21ac55 608 *
b0d623f7
A
609 * !WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY == no pending timer, no active timer
610 * !WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY == no pending timer, 1 active timer
611 * WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY == 1 pending timer, no active timer
612 * WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY == 1 pending timer, 1 active timer
2d21ac55 613 */
b0d623f7
A
614 while (wq->wq_lflags & WQL_ATIMER_BUSY) {
615 wq->wq_lflags |= WQL_ATIMER_WAITING;
2d21ac55 616
b0d623f7
A
617 assert_wait((caddr_t)wq, (THREAD_UNINT));
618 workqueue_unlock(p);
2d21ac55 619
b0d623f7 620 thread_block(THREAD_CONTINUE_NULL);
2d21ac55 621
b0d623f7
A
622 workqueue_lock_spin(p);
623 }
624 wq->wq_lflags |= WQL_ATIMER_BUSY;
2d21ac55
A
625
626 /*
b0d623f7
A
627 * the workq lock will protect us from seeing WQ_EXITING change state, but we
628 * still need to update this atomically in case someone else tries to start
629 * the timer just as we're releasing it
2d21ac55 630 */
b0d623f7 631 while ( !(OSCompareAndSwap(wq->wq_flags, (wq->wq_flags & ~WQ_ATIMER_RUNNING), (UInt32 *)&wq->wq_flags)));
2d21ac55 632
b0d623f7
A
633again:
634 retval = TRUE;
635 add_thread = FALSE;
636
637 if ( !(wq->wq_flags & WQ_EXITING)) {
638 /*
639 * check to see if the stall frequency was beyond our tolerance
640 * or we have work on the queue, but haven't scheduled any
641 * new work within our acceptable time interval because
642 * there were no idle threads left to schedule
643 */
644 if (wq->wq_itemcount) {
645 uint32_t priority;
646 uint32_t affinity_tag;
647 uint32_t i;
648 uint64_t curtime;
649
650 for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) {
651 if (wq->wq_list_bitmap & (1 << priority))
652 break;
653 }
654 assert(priority < WORKQUEUE_NUMPRIOS);
655
656 curtime = mach_absolute_time();
657 busycount = 0;
658
659 for (affinity_tag = 0; affinity_tag < wq->wq_reqconc[priority]; affinity_tag++) {
660 /*
661 * if we have no idle threads, we can try to add them if needed
662 */
663 if (wq->wq_thidlecount == 0)
664 add_thread = TRUE;
665
666 /*
667 * look for first affinity group that is currently not active
668 * i.e. no active threads at this priority level or higher
669 * and has not been active recently at this priority level or higher
670 */
671 for (i = 0; i <= priority; i++) {
672 if (wq->wq_thactive_count[i][affinity_tag]) {
673 add_thread = FALSE;
674 break;
675 }
676 if (wq->wq_thscheduled_count[i][affinity_tag]) {
677 if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i][affinity_tag])) {
678 add_thread = FALSE;
679 busycount++;
680 break;
681 }
682 }
683 }
684 if (add_thread == TRUE) {
685 retval = workqueue_addnewthread(wq);
686 break;
687 }
688 }
689 if (wq->wq_itemcount) {
690 /*
691 * as long as we have threads to schedule, and we successfully
692 * scheduled new work, keep trying
693 */
694 while (wq->wq_thidlecount && !(wq->wq_flags & WQ_EXITING)) {
695 /*
696 * workqueue_run_nextitem is responsible for
697 * dropping the workqueue lock in all cases
698 */
699 retval = workqueue_run_nextitem(p, wq, THREAD_NULL, 0, 0, 0);
700 workqueue_lock_spin(p);
701
702 if (retval == FALSE)
703 break;
704 }
705 if ( !(wq->wq_flags & WQ_EXITING) && wq->wq_itemcount) {
706
707 if (wq->wq_thidlecount == 0 && retval == TRUE && add_thread == TRUE)
708 goto again;
2d21ac55 709
b0d623f7
A
710 if (wq->wq_thidlecount == 0 || busycount)
711 WQ_TIMER_NEEDED(wq, start_timer);
712
713 KERNEL_DEBUG(0xefffd108 | DBG_FUNC_NONE, wq, wq->wq_itemcount, wq->wq_thidlecount, busycount, 0);
714 }
715 }
716 }
2d21ac55 717 }
b0d623f7
A
718 if ( !(wq->wq_flags & WQ_ATIMER_RUNNING))
719 wq->wq_timer_interval = 0;
2d21ac55 720
b0d623f7
A
721 wq->wq_lflags &= ~WQL_ATIMER_BUSY;
722
723 if ((wq->wq_flags & WQ_EXITING) || (wq->wq_lflags & WQL_ATIMER_WAITING)) {
724 /*
725 * wakeup the thread hung up in workqueue_exit or workqueue_add_timer waiting for this timer
726 * to finish getting out of the way
2d21ac55 727 */
b0d623f7
A
728 wq->wq_lflags &= ~WQL_ATIMER_WAITING;
729 wakeup(wq);
2d21ac55 730 }
b0d623f7 731 KERNEL_DEBUG(0xefffd108 | DBG_FUNC_END, wq, start_timer, wq->wq_nthreads, wq->wq_thidlecount, 0);
2d21ac55 732
b0d623f7 733 workqueue_unlock(p);
2d21ac55 734
b0d623f7
A
735 if (start_timer == TRUE)
736 workqueue_interval_timer_start(wq);
2d21ac55
A
737}
738
739
b0d623f7
A
740void
741workqueue_thread_yielded(void)
742{
743 struct workqueue *wq;
744 proc_t p;
745
746 p = current_proc();
747
748 if ((wq = p->p_wqptr) == NULL || wq->wq_itemcount == 0)
749 return;
750
751 workqueue_lock_spin(p);
752
753 if (wq->wq_itemcount) {
754 uint64_t curtime;
755 uint64_t elapsed;
756 clock_sec_t secs;
757 clock_usec_t usecs;
758
759 if (wq->wq_thread_yielded_count++ == 0)
760 wq->wq_thread_yielded_timestamp = mach_absolute_time();
761
762 if (wq->wq_thread_yielded_count < wq_yielded_threshold) {
763 workqueue_unlock(p);
764 return;
765 }
766 KERNEL_DEBUG(0xefffd138 | DBG_FUNC_START, wq, wq->wq_thread_yielded_count, wq->wq_itemcount, 0, 0);
767
768 wq->wq_thread_yielded_count = 0;
769
770 curtime = mach_absolute_time();
771 elapsed = curtime - wq->wq_thread_yielded_timestamp;
772 absolutetime_to_microtime(elapsed, &secs, &usecs);
773
774 if (secs == 0 && usecs < wq_yielded_window_usecs) {
775
776 if (wq->wq_thidlecount == 0) {
777 workqueue_addnewthread(wq);
778 /*
779 * 'workqueue_addnewthread' drops the workqueue lock
780 * when creating the new thread and then retakes it before
781 * returning... this window allows other threads to process
782 * work on the queue, so we need to recheck for available work
783 * if none found, we just return... the newly created thread
784 * will eventually get used (if it hasn't already)...
785 */
786 if (wq->wq_itemcount == 0) {
787 workqueue_unlock(p);
788 return;
789 }
790 }
791 if (wq->wq_thidlecount) {
792 uint32_t priority;
793 uint32_t affinity = -1;
794 user_addr_t item;
795 struct workitem *witem = NULL;
796 struct workitemlist *wl = NULL;
797 struct uthread *uth;
798 struct threadlist *tl;
799
800 uth = get_bsdthread_info(current_thread());
801 if ((tl = uth->uu_threadlist))
802 affinity = tl->th_affinity_tag;
803
804 for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) {
805 if (wq->wq_list_bitmap & (1 << priority)) {
806 wl = (struct workitemlist *)&wq->wq_list[priority];
807 break;
808 }
809 }
810 assert(wl != NULL);
811 assert(!(TAILQ_EMPTY(&wl->wl_itemlist)));
812
813 witem = TAILQ_FIRST(&wl->wl_itemlist);
814 TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry);
815
816 if (TAILQ_EMPTY(&wl->wl_itemlist))
817 wq->wq_list_bitmap &= ~(1 << priority);
818 wq->wq_itemcount--;
819
820 item = witem->wi_item;
821 witem->wi_item = (user_addr_t)0;
822 witem->wi_affinity = 0;
823
824 TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry);
825
826 (void)workqueue_run_nextitem(p, wq, THREAD_NULL, item, priority, affinity);
827 /*
828 * workqueue_run_nextitem is responsible for
829 * dropping the workqueue lock in all cases
830 */
831 KERNEL_DEBUG(0xefffd138 | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_itemcount, 1, 0);
832
833 return;
834 }
835 }
836 KERNEL_DEBUG(0xefffd138 | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_itemcount, 2, 0);
837 }
838 workqueue_unlock(p);
839}
840
841
842
2d21ac55 843static void
b0d623f7 844workqueue_callback(int type, thread_t thread)
2d21ac55
A
845{
846 struct uthread *uth;
847 struct threadlist *tl;
848 struct workqueue *wq;
849
850 uth = get_bsdthread_info(thread);
b0d623f7
A
851 tl = uth->uu_threadlist;
852 wq = tl->th_workq;
2d21ac55
A
853
854 switch (type) {
855
856 case SCHED_CALL_BLOCK:
857 {
858 uint32_t old_activecount;
859
b0d623f7
A
860 old_activecount = OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority][tl->th_affinity_tag]);
861
862 if (old_activecount == 1) {
863 boolean_t start_timer = FALSE;
864 uint64_t curtime;
865 UInt64 *lastblocked_ptr;
2d21ac55 866
2d21ac55
A
867 /*
868 * we were the last active thread on this affinity set
869 * and we've got work to do
870 */
b0d623f7
A
871 lastblocked_ptr = (UInt64 *)&wq->wq_lastblocked_ts[tl->th_priority][tl->th_affinity_tag];
872 curtime = mach_absolute_time();
873
2d21ac55 874 /*
b0d623f7
A
875 * if we collide with another thread trying to update the last_blocked (really unlikely
876 * since another thread would have to get scheduled and then block after we start down
877 * this path), it's not a problem. Either timestamp is adequate, so no need to retry
2d21ac55 878 */
b0d623f7 879#if defined(__ppc__)
2d21ac55 880 /*
b0d623f7 881 * this doesn't have to actually work reliablly for PPC, it just has to compile/link
2d21ac55 882 */
b0d623f7
A
883 *lastblocked_ptr = (UInt64)curtime;
884#else
885 OSCompareAndSwap64(*lastblocked_ptr, (UInt64)curtime, lastblocked_ptr);
886#endif
887 if (wq->wq_itemcount)
888 WQ_TIMER_NEEDED(wq, start_timer);
889
890 if (start_timer == TRUE)
891 workqueue_interval_timer_start(wq);
2d21ac55 892 }
b0d623f7 893 KERNEL_DEBUG1(0xefffd020 | DBG_FUNC_START, wq, old_activecount, tl->th_priority, tl->th_affinity_tag, thread_tid(thread));
2d21ac55
A
894 }
895 break;
896
897 case SCHED_CALL_UNBLOCK:
898 /*
899 * we cannot take the workqueue_lock here...
900 * an UNBLOCK can occur from a timer event which
901 * is run from an interrupt context... if the workqueue_lock
902 * is already held by this processor, we'll deadlock...
903 * the thread lock for the thread being UNBLOCKED
904 * is also held
905 */
b0d623f7
A
906 if (tl->th_suspended) {
907 OSAddAtomic(-1, &tl->th_suspended);
908 KERNEL_DEBUG1(0xefffd024, wq, wq->wq_threads_scheduled, tl->th_priority, tl->th_affinity_tag, thread_tid(thread));
909 } else {
910 OSAddAtomic(1, &wq->wq_thactive_count[tl->th_priority][tl->th_affinity_tag]);
2d21ac55 911
b0d623f7
A
912 KERNEL_DEBUG1(0xefffd020 | DBG_FUNC_END, wq, wq->wq_threads_scheduled, tl->th_priority, tl->th_affinity_tag, thread_tid(thread));
913 }
2d21ac55
A
914 break;
915 }
916}
917
b0d623f7 918
2d21ac55 919static void
b0d623f7 920workqueue_removethread(struct threadlist *tl)
2d21ac55 921{
b0d623f7
A
922 struct workqueue *wq;
923 struct uthread * uth;
2d21ac55 924
b0d623f7 925 wq = tl->th_workq;
2d21ac55 926
b0d623f7 927 TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
2d21ac55 928
b0d623f7
A
929 wq->wq_nthreads--;
930 wq->wq_thidlecount--;
2d21ac55 931
b0d623f7
A
932 /*
933 * Clear the threadlist pointer in uthread so
934 * blocked thread on wakeup for termination will
935 * not access the thread list as it is going to be
936 * freed.
937 */
938 thread_sched_call(tl->th_thread, NULL);
2d21ac55 939
b0d623f7
A
940 uth = get_bsdthread_info(tl->th_thread);
941 if (uth != (struct uthread *)0) {
942 uth->uu_threadlist = NULL;
2d21ac55
A
943 }
944 workqueue_unlock(wq->wq_proc);
945
b0d623f7 946 if ( (tl->th_flags & TH_LIST_SUSPENDED) ) {
2d21ac55 947 /*
b0d623f7
A
948 * thread was created, but never used...
949 * need to clean up the stack and port ourselves
950 * since we're not going to spin up through the
951 * normal exit path triggered from Libc
2d21ac55 952 */
b0d623f7
A
953 (void)mach_vm_deallocate(wq->wq_map, tl->th_stackaddr, tl->th_allocsize);
954 (void)mach_port_deallocate(get_task_ipcspace(wq->wq_task), tl->th_thport);
2d21ac55 955
b0d623f7
A
956 KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END, wq, (uintptr_t)thread_tid(current_thread()), wq->wq_nthreads, 0xdead, thread_tid(tl->th_thread));
957 } else {
958
959 KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_END, wq, (uintptr_t)thread_tid(current_thread()), wq->wq_nthreads, 0xdead, thread_tid(tl->th_thread));
2d21ac55 960 }
b0d623f7
A
961 /*
962 * drop our ref on the thread
963 */
964 thread_deallocate(tl->th_thread);
965
966 kfree(tl, sizeof(struct threadlist));
2d21ac55
A
967}
968
969
b0d623f7
A
970
971static boolean_t
2d21ac55
A
972workqueue_addnewthread(struct workqueue *wq)
973{
974 struct threadlist *tl;
975 struct uthread *uth;
976 kern_return_t kret;
977 thread_t th;
978 proc_t p;
979 void *sright;
980 mach_vm_offset_t stackaddr;
b0d623f7
A
981
982 if (wq->wq_nthreads >= wq_max_threads || wq->wq_nthreads >= (CONFIG_THREAD_MAX - 20))
983 return (FALSE);
984 wq->wq_nthreads++;
2d21ac55
A
985
986 p = wq->wq_proc;
b0d623f7 987 workqueue_unlock(p);
2d21ac55 988
b0d623f7 989 kret = thread_create_workq(wq->wq_task, &th);
2d21ac55
A
990
991 if (kret != KERN_SUCCESS)
b0d623f7 992 goto failed;
2d21ac55
A
993
994 tl = kalloc(sizeof(struct threadlist));
995 bzero(tl, sizeof(struct threadlist));
996
997#if defined(__ppc__)
998 stackaddr = 0xF0000000;
b0d623f7 999#elif defined(__i386__) || defined(__x86_64__)
2d21ac55 1000 stackaddr = 0xB0000000;
2d21ac55
A
1001#else
1002#error Need to define a stack address hint for this architecture
1003#endif
1004 tl->th_allocsize = PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE + p->p_pthsize;
1005
1006 kret = mach_vm_map(wq->wq_map, &stackaddr,
1007 tl->th_allocsize,
1008 page_size-1,
1009 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
1010 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
1011 VM_INHERIT_DEFAULT);
1012
1013 if (kret != KERN_SUCCESS) {
1014 kret = mach_vm_allocate(wq->wq_map,
1015 &stackaddr, tl->th_allocsize,
1016 VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
1017 }
1018 if (kret == KERN_SUCCESS) {
1019 /*
1020 * The guard page is at the lowest address
1021 * The stack base is the highest address
1022 */
1023 kret = mach_vm_protect(wq->wq_map, stackaddr, PTH_DEFAULT_GUARDSIZE, FALSE, VM_PROT_NONE);
1024
1025 if (kret != KERN_SUCCESS)
1026 (void) mach_vm_deallocate(wq->wq_map, stackaddr, tl->th_allocsize);
1027 }
1028 if (kret != KERN_SUCCESS) {
1029 (void) thread_terminate(th);
1030
1031 kfree(tl, sizeof(struct threadlist));
b0d623f7 1032 goto failed;
2d21ac55
A
1033 }
1034 thread_reference(th);
1035
1036 sright = (void *) convert_thread_to_port(th);
b0d623f7 1037 tl->th_thport = ipc_port_copyout_send(sright, get_task_ipcspace(wq->wq_task));
2d21ac55
A
1038
1039 thread_static_param(th, TRUE);
1040
2d21ac55
A
1041 tl->th_flags = TH_LIST_INITED | TH_LIST_SUSPENDED;
1042
1043 tl->th_thread = th;
1044 tl->th_workq = wq;
1045 tl->th_stackaddr = stackaddr;
b0d623f7
A
1046 tl->th_affinity_tag = -1;
1047 tl->th_priority = WORKQUEUE_NUMPRIOS;
1048 tl->th_policy = -1;
1049 tl->th_suspended = 1;
2d21ac55
A
1050
1051#if defined(__ppc__)
1052 //ml_fp_setvalid(FALSE);
1053 thread_set_cthreadself(th, (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE), IS_64BIT_PROCESS(p));
1054#endif /* __ppc__ */
2d21ac55
A
1055
1056 uth = get_bsdthread_info(tl->th_thread);
1057 uth->uu_threadlist = (void *)tl;
1058
1059 workqueue_lock_spin(p);
1060
b0d623f7 1061 TAILQ_INSERT_TAIL(&wq->wq_thidlelist, tl, th_entry);
2d21ac55 1062
b0d623f7 1063 wq->wq_thidlecount++;
2d21ac55 1064
b0d623f7 1065 KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_START, wq, wq->wq_nthreads, 0, thread_tid(current_thread()), thread_tid(tl->th_thread));
2d21ac55 1066
b0d623f7
A
1067 return (TRUE);
1068
1069failed:
1070 workqueue_lock_spin(p);
1071 wq->wq_nthreads--;
1072
1073 return (FALSE);
2d21ac55
A
1074}
1075
b0d623f7 1076
2d21ac55 1077int
b0d623f7 1078workq_open(struct proc *p, __unused struct workq_open_args *uap, __unused int32_t *retval)
2d21ac55
A
1079{
1080 struct workqueue * wq;
b0d623f7 1081 int wq_size;
2d21ac55 1082 char * ptr;
b0d623f7 1083 char * nptr;
2d21ac55
A
1084 int j;
1085 uint32_t i;
b0d623f7 1086 uint32_t num_cpus;
2d21ac55 1087 int error = 0;
b0d623f7 1088 boolean_t need_wakeup = FALSE;
2d21ac55
A
1089 struct workitem * witem;
1090 struct workitemlist *wl;
1091
b0d623f7
A
1092 if ((p->p_lflag & P_LREGISTER) == 0)
1093 return(EINVAL);
1094
1095 workqueue_lock_spin(p);
2d21ac55
A
1096
1097 if (p->p_wqptr == NULL) {
b0d623f7
A
1098
1099 while (p->p_wqiniting == TRUE) {
1100
1101 assert_wait((caddr_t)&p->p_wqiniting, THREAD_UNINT);
1102 workqueue_unlock(p);
1103
1104 thread_block(THREAD_CONTINUE_NULL);
1105
1106 workqueue_lock_spin(p);
1107 }
1108 if (p->p_wqptr != NULL)
1109 goto out;
1110
1111 p->p_wqiniting = TRUE;
1112
1113 workqueue_unlock(p);
1114
2d21ac55
A
1115 num_cpus = ml_get_max_cpus();
1116
b0d623f7
A
1117 wq_size = sizeof(struct workqueue) +
1118 (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint32_t)) +
1119 (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint32_t)) +
1120 (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint64_t)) +
1121 sizeof(uint64_t);
2d21ac55 1122
b0d623f7
A
1123 ptr = (char *)kalloc(wq_size);
1124 bzero(ptr, wq_size);
2d21ac55
A
1125
1126 wq = (struct workqueue *)ptr;
1127 wq->wq_flags = WQ_LIST_INITED;
1128 wq->wq_proc = p;
1129 wq->wq_affinity_max = num_cpus;
1130 wq->wq_task = current_task();
1131 wq->wq_map = current_map();
1132
1133 for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
1134 wl = (struct workitemlist *)&wq->wq_list[i];
1135 TAILQ_INIT(&wl->wl_itemlist);
1136 TAILQ_INIT(&wl->wl_freelist);
1137
1138 for (j = 0; j < WORKITEM_SIZE; j++) {
1139 witem = &wq->wq_array[(i*WORKITEM_SIZE) + j];
1140 TAILQ_INSERT_TAIL(&wl->wl_freelist, witem, wi_entry);
1141 }
b0d623f7 1142 wq->wq_reqconc[i] = wq->wq_affinity_max;
2d21ac55 1143 }
b0d623f7 1144 nptr = ptr + sizeof(struct workqueue);
2d21ac55 1145
b0d623f7
A
1146 for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
1147 wq->wq_thactive_count[i] = (uint32_t *)nptr;
1148 nptr += (num_cpus * sizeof(uint32_t));
1149 }
1150 for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
1151 wq->wq_thscheduled_count[i] = (uint32_t *)nptr;
1152 nptr += (num_cpus * sizeof(uint32_t));
1153 }
1154 /*
1155 * align nptr on a 64 bit boundary so that we can do nice
1156 * atomic64 operations on the timestamps...
1157 * note that we requested an extra uint64_t when calcuating
1158 * the size for the allocation of the workqueue struct
1159 */
1160 nptr += (sizeof(uint64_t) - 1);
1161 nptr = (char *)((long)nptr & ~(sizeof(uint64_t) - 1));
2d21ac55 1162
b0d623f7
A
1163 for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
1164 wq->wq_lastblocked_ts[i] = (uint64_t *)nptr;
1165 nptr += (num_cpus * sizeof(uint64_t));
1166 }
2d21ac55 1167 TAILQ_INIT(&wq->wq_thrunlist);
b0d623f7 1168 TAILQ_INIT(&wq->wq_thidlelist);
2d21ac55 1169
b0d623f7 1170 wq->wq_atimer_call = thread_call_allocate((thread_call_func_t)workqueue_add_timer, (thread_call_param_t)wq);
2d21ac55 1171
2d21ac55
A
1172 workqueue_lock_spin(p);
1173
b0d623f7
A
1174 p->p_wqptr = (void *)wq;
1175 p->p_wqsize = wq_size;
1176
1177 p->p_wqiniting = FALSE;
1178 need_wakeup = TRUE;
2d21ac55 1179 }
b0d623f7 1180out:
2d21ac55
A
1181 workqueue_unlock(p);
1182
b0d623f7
A
1183 if (need_wakeup == TRUE)
1184 wakeup(&p->p_wqiniting);
2d21ac55
A
1185 return(error);
1186}
1187
1188int
b0d623f7 1189workq_kernreturn(struct proc *p, struct workq_kernreturn_args *uap, __unused int32_t *retval)
2d21ac55 1190{
2d21ac55 1191 user_addr_t item = uap->item;
b0d623f7
A
1192 int options = uap->options;
1193 int prio = uap->prio; /* should be used to find the right workqueue */
1194 int affinity = uap->affinity;
1195 int error = 0;
1196 thread_t th = THREAD_NULL;
1197 user_addr_t oc_item = 0;
2d21ac55
A
1198 struct workqueue *wq;
1199
b0d623f7
A
1200 if ((p->p_lflag & P_LREGISTER) == 0)
1201 return(EINVAL);
1202
1203 /*
1204 * affinity not yet hooked up on this path
1205 */
1206 affinity = -1;
2d21ac55
A
1207
1208 switch (options) {
1209
1210 case WQOPS_QUEUE_ADD: {
b0d623f7
A
1211
1212 if (prio & WORKQUEUE_OVERCOMMIT) {
1213 prio &= ~WORKQUEUE_OVERCOMMIT;
1214 oc_item = item;
1215 }
1216 if ((prio < 0) || (prio >= WORKQUEUE_NUMPRIOS))
1217 return (EINVAL);
c910b4d9 1218
2d21ac55
A
1219 workqueue_lock_spin(p);
1220
1221 if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
1222 workqueue_unlock(p);
1223 return (EINVAL);
1224 }
b0d623f7
A
1225 if (wq->wq_thidlecount == 0 && (oc_item || (wq->wq_nthreads < wq->wq_affinity_max))) {
1226
1227 workqueue_addnewthread(wq);
1228
1229 if (wq->wq_thidlecount == 0)
1230 oc_item = 0;
1231 }
1232 if (oc_item == 0)
1233 error = workqueue_additem(wq, prio, item, affinity);
1234
1235 KERNEL_DEBUG(0xefffd008 | DBG_FUNC_NONE, wq, prio, affinity, oc_item, 0);
2d21ac55
A
1236 }
1237 break;
1238 case WQOPS_QUEUE_REMOVE: {
1239
b0d623f7
A
1240 if ((prio < 0) || (prio >= WORKQUEUE_NUMPRIOS))
1241 return (EINVAL);
c910b4d9 1242
2d21ac55
A
1243 workqueue_lock_spin(p);
1244
1245 if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
1246 workqueue_unlock(p);
1247 return (EINVAL);
1248 }
1249 error = workqueue_removeitem(wq, prio, item);
1250 }
1251 break;
1252 case WQOPS_THREAD_RETURN: {
1253
1254 th = current_thread();
b0d623f7
A
1255 struct uthread *uth = get_bsdthread_info(th);
1256
1257 /* reset signal mask on the workqueue thread to default state */
1258 if (uth->uu_sigmask != (sigset_t)(~workq_threadmask)) {
1259 proc_lock(p);
1260 uth->uu_sigmask = ~workq_threadmask;
1261 proc_unlock(p);
1262 }
1263
1264 workqueue_lock_spin(p);
1265
1266 if ((wq = (struct workqueue *)p->p_wqptr) == NULL || (uth->uu_threadlist == NULL)) {
1267 workqueue_unlock(p);
1268 return (EINVAL);
1269 }
1270 KERNEL_DEBUG(0xefffd004 | DBG_FUNC_END, wq, 0, 0, 0, 0);
1271 }
1272 break;
1273 case WQOPS_THREAD_SETCONC: {
2d21ac55 1274
b0d623f7
A
1275 if ((prio < 0) || (prio > WORKQUEUE_NUMPRIOS))
1276 return (EINVAL);
2d21ac55
A
1277
1278 workqueue_lock_spin(p);
1279
1280 if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
1281 workqueue_unlock(p);
1282 return (EINVAL);
1283 }
b0d623f7
A
1284 /*
1285 * for this operation, we re-purpose the affinity
1286 * argument as the concurrency target
1287 */
1288 if (prio < WORKQUEUE_NUMPRIOS)
1289 wq->wq_reqconc[prio] = affinity;
1290 else {
1291 for (prio = 0; prio < WORKQUEUE_NUMPRIOS; prio++)
1292 wq->wq_reqconc[prio] = affinity;
1293
1294 }
2d21ac55
A
1295 }
1296 break;
1297 default:
1298 return (EINVAL);
1299 }
b0d623f7 1300 (void)workqueue_run_nextitem(p, wq, th, oc_item, prio, affinity);
2d21ac55
A
1301 /*
1302 * workqueue_run_nextitem is responsible for
1303 * dropping the workqueue lock in all cases
1304 */
b0d623f7
A
1305 return (error);
1306
2d21ac55
A
1307}
1308
b0d623f7 1309
2d21ac55
A
1310void
1311workqueue_exit(struct proc *p)
1312{
1313 struct workqueue * wq;
1314 struct threadlist * tl, *tlist;
b0d623f7
A
1315 struct uthread *uth;
1316 int wq_size = 0;
2d21ac55
A
1317
1318 if (p->p_wqptr != NULL) {
1319
b0d623f7
A
1320 KERNEL_DEBUG(0x900808c | DBG_FUNC_START, p->p_wqptr, 0, 0, 0, 0);
1321
2d21ac55
A
1322 workqueue_lock_spin(p);
1323
1324 wq = (struct workqueue *)p->p_wqptr;
b0d623f7
A
1325
1326 if (wq == NULL) {
1327 workqueue_unlock(p);
1328
1329 KERNEL_DEBUG(0x900808c | DBG_FUNC_END, 0, 0, 0, -1, 0);
1330 return;
1331 }
1332 wq_size = p->p_wqsize;
2d21ac55 1333 p->p_wqptr = NULL;
b0d623f7
A
1334 p->p_wqsize = 0;
1335
1336 /*
1337 * we now arm the timer in the callback function w/o holding the workq lock...
1338 * we do this by setting WQ_ATIMER_RUNNING via OSCompareAndSwap in order to
1339 * insure only a single timer if running and to notice that WQ_EXITING has
1340 * been set (we don't want to start a timer once WQ_EXITING is posted)
1341 *
1342 * so once we have successfully set WQ_EXITING, we cannot fire up a new timer...
1343 * therefor no need to clear the timer state atomically from the flags
1344 *
1345 * since we always hold the workq lock when dropping WQ_ATIMER_RUNNING
1346 * the check for and sleep until clear is protected
1347 */
1348 while ( !(OSCompareAndSwap(wq->wq_flags, (wq->wq_flags | WQ_EXITING), (UInt32 *)&wq->wq_flags)));
2d21ac55 1349
b0d623f7
A
1350 if (wq->wq_flags & WQ_ATIMER_RUNNING) {
1351 if (thread_call_cancel(wq->wq_atimer_call) == TRUE)
1352 wq->wq_flags &= ~WQ_ATIMER_RUNNING;
1353 }
1354 while ((wq->wq_flags & WQ_ATIMER_RUNNING) || (wq->wq_lflags & WQL_ATIMER_BUSY)) {
1355
1356 assert_wait((caddr_t)wq, (THREAD_UNINT));
1357 workqueue_unlock(p);
1358
1359 thread_block(THREAD_CONTINUE_NULL);
1360
1361 workqueue_lock_spin(p);
1362 }
2d21ac55
A
1363 workqueue_unlock(p);
1364
b0d623f7
A
1365 TAILQ_FOREACH_SAFE(tl, &wq->wq_thrunlist, th_entry, tlist) {
1366
1367 thread_sched_call(tl->th_thread, NULL);
1368
1369 uth = get_bsdthread_info(tl->th_thread);
1370 if (uth != (struct uthread *)0) {
1371 uth->uu_threadlist = NULL;
1372 }
1373 TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
1374
1375 /*
1376 * drop our last ref on the thread
1377 */
1378 thread_deallocate(tl->th_thread);
1379
1380 kfree(tl, sizeof(struct threadlist));
1381 }
1382 TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlelist, th_entry, tlist) {
1383
1384 thread_sched_call(tl->th_thread, NULL);
1385
1386 uth = get_bsdthread_info(tl->th_thread);
1387 if (uth != (struct uthread *)0) {
1388 uth->uu_threadlist = NULL;
1389 }
1390 TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
2d21ac55 1391
2d21ac55
A
1392 /*
1393 * drop our last ref on the thread
1394 */
2d21ac55
A
1395 thread_deallocate(tl->th_thread);
1396
2d21ac55
A
1397 kfree(tl, sizeof(struct threadlist));
1398 }
b0d623f7 1399 thread_call_free(wq->wq_atimer_call);
2d21ac55 1400
b0d623f7
A
1401 kfree(wq, wq_size);
1402
1403 KERNEL_DEBUG(0x900808c | DBG_FUNC_END, 0, 0, 0, 0, 0);
2d21ac55
A
1404 }
1405}
1406
1407static int
b0d623f7 1408workqueue_additem(struct workqueue *wq, int prio, user_addr_t item, int affinity)
2d21ac55
A
1409{
1410 struct workitem *witem;
1411 struct workitemlist *wl;
1412
1413 wl = (struct workitemlist *)&wq->wq_list[prio];
1414
1415 if (TAILQ_EMPTY(&wl->wl_freelist))
1416 return (ENOMEM);
1417
1418 witem = (struct workitem *)TAILQ_FIRST(&wl->wl_freelist);
1419 TAILQ_REMOVE(&wl->wl_freelist, witem, wi_entry);
1420
1421 witem->wi_item = item;
b0d623f7 1422 witem->wi_affinity = affinity;
2d21ac55
A
1423 TAILQ_INSERT_TAIL(&wl->wl_itemlist, witem, wi_entry);
1424
b0d623f7
A
1425 wq->wq_list_bitmap |= (1 << prio);
1426
2d21ac55
A
1427 wq->wq_itemcount++;
1428
1429 return (0);
1430}
1431
1432static int
1433workqueue_removeitem(struct workqueue *wq, int prio, user_addr_t item)
1434{
1435 struct workitem *witem;
1436 struct workitemlist *wl;
1437 int error = ESRCH;
1438
1439 wl = (struct workitemlist *)&wq->wq_list[prio];
1440
1441 TAILQ_FOREACH(witem, &wl->wl_itemlist, wi_entry) {
1442 if (witem->wi_item == item) {
1443 TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry);
2d21ac55 1444
b0d623f7
A
1445 if (TAILQ_EMPTY(&wl->wl_itemlist))
1446 wq->wq_list_bitmap &= ~(1 << prio);
1447 wq->wq_itemcount--;
1448
2d21ac55 1449 witem->wi_item = (user_addr_t)0;
b0d623f7 1450 witem->wi_affinity = 0;
2d21ac55
A
1451 TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry);
1452
1453 error = 0;
1454 break;
1455 }
1456 }
2d21ac55
A
1457 return (error);
1458}
1459
b0d623f7
A
1460
1461
1462
1463static int workqueue_importance[WORKQUEUE_NUMPRIOS] =
1464{
1465 2, 0, -2,
1466};
1467
1468static int workqueue_policy[WORKQUEUE_NUMPRIOS] =
1469{
1470 1, 1, 1,
1471};
1472
1473
2d21ac55
A
1474/*
1475 * workqueue_run_nextitem:
1476 * called with the workqueue lock held...
1477 * responsible for dropping it in all cases
1478 */
b0d623f7
A
1479static boolean_t
1480workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_addr_t oc_item, int oc_prio, int oc_affinity)
2d21ac55 1481{
2d21ac55
A
1482 struct workitem *witem = NULL;
1483 user_addr_t item = 0;
1484 thread_t th_to_run = THREAD_NULL;
1485 thread_t th_to_park = THREAD_NULL;
1486 int wake_thread = 0;
1487 int reuse_thread = 1;
b0d623f7
A
1488 uint32_t priority, orig_priority;
1489 uint32_t affinity_tag, orig_affinity_tag;
1490 uint32_t i, n;
1491 uint32_t activecount;
1492 uint32_t busycount;
1493 uint32_t us_to_wait;
2d21ac55 1494 struct threadlist *tl = NULL;
b0d623f7 1495 struct threadlist *ttl = NULL;
2d21ac55 1496 struct uthread *uth = NULL;
b0d623f7 1497 struct workitemlist *wl = NULL;
2d21ac55 1498 boolean_t start_timer = FALSE;
b0d623f7
A
1499 boolean_t adjust_counters = TRUE;
1500 uint64_t curtime;
2d21ac55 1501
2d21ac55 1502
b0d623f7
A
1503 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_START, wq, thread, wq->wq_thidlecount, wq->wq_itemcount, 0);
1504
1505 /*
1506 * from here until we drop the workq lock
1507 * we can't be pre-empted since we hold
1508 * the lock in spin mode... this is important
1509 * since we have to independently update the priority
1510 * and affinity that the thread is associated with
1511 * and these values are used to index the multi-dimensional
1512 * counter arrays in 'workqueue_callback'
1513 */
1514 if (oc_item) {
1515 uint32_t min_scheduled = 0;
1516 uint32_t scheduled_count;
1517 uint32_t active_count;
1518 uint32_t t_affinity = 0;
1519
1520 priority = oc_prio;
1521 item = oc_item;
1522
1523 if ((affinity_tag = oc_affinity) == (uint32_t)-1) {
1524 for (affinity_tag = 0; affinity_tag < wq->wq_reqconc[priority]; affinity_tag++) {
1525 /*
1526 * look for the affinity group with the least number of threads
1527 */
1528 scheduled_count = 0;
1529 active_count = 0;
2d21ac55 1530
b0d623f7
A
1531 for (i = 0; i <= priority; i++) {
1532 scheduled_count += wq->wq_thscheduled_count[i][affinity_tag];
1533 active_count += wq->wq_thactive_count[i][affinity_tag];
1534 }
1535 if (active_count == 0) {
1536 t_affinity = affinity_tag;
1537 break;
1538 }
1539 if (affinity_tag == 0 || scheduled_count < min_scheduled) {
1540 min_scheduled = scheduled_count;
1541 t_affinity = affinity_tag;
1542 }
1543 }
1544 affinity_tag = t_affinity;
1545 }
1546 goto grab_idle_thread;
1547 }
2d21ac55
A
1548 if (wq->wq_itemcount == 0) {
1549 if ((th_to_park = thread) == THREAD_NULL)
b0d623f7 1550 goto out_of_work;
2d21ac55
A
1551 goto parkit;
1552 }
b0d623f7
A
1553 for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) {
1554 if (wq->wq_list_bitmap & (1 << priority)) {
1555 wl = (struct workitemlist *)&wq->wq_list[priority];
1556 break;
1557 }
1558 }
1559 assert(wl != NULL);
1560 assert(!(TAILQ_EMPTY(&wl->wl_itemlist)));
1561
1562 curtime = mach_absolute_time();
1563
2d21ac55 1564 if (thread != THREAD_NULL) {
2d21ac55
A
1565 uth = get_bsdthread_info(thread);
1566 tl = uth->uu_threadlist;
b0d623f7 1567 affinity_tag = tl->th_affinity_tag;
2d21ac55 1568
b0d623f7
A
1569 /*
1570 * check to see if the affinity group this thread is
1571 * associated with is still within the bounds of the
1572 * specified concurrency for the priority level
1573 * we're considering running work for
1574 */
1575 if (affinity_tag < wq->wq_reqconc[priority]) {
1576 /*
1577 * we're a worker thread from the pool... currently we
1578 * are considered 'active' which means we're counted
1579 * in "wq_thactive_count"
1580 * add up the active counts of all the priority levels
1581 * up to and including the one we want to schedule
2d21ac55 1582 */
b0d623f7
A
1583 for (activecount = 0, i = 0; i <= priority; i++) {
1584 uint32_t acount;
1585
1586 acount = wq->wq_thactive_count[i][affinity_tag];
1587
1588 if (acount == 0 && wq->wq_thscheduled_count[i][affinity_tag]) {
1589 if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i][affinity_tag]))
1590 acount = 1;
1591 }
1592 activecount += acount;
1593 }
1594 if (activecount == 1) {
1595 /*
1596 * we're the only active thread associated with our
1597 * affinity group at this priority level and higher,
1598 * so pick up some work and keep going
1599 */
1600 th_to_run = thread;
1601 goto pick_up_work;
1602 }
2d21ac55 1603 }
b0d623f7
A
1604 /*
1605 * there's more than 1 thread running in this affinity group
1606 * or the concurrency level has been cut back for this priority...
1607 * lets continue on and look for an 'empty' group to run this
1608 * work item in
1609 */
2d21ac55 1610 }
b0d623f7
A
1611 busycount = 0;
1612
1613 for (affinity_tag = 0; affinity_tag < wq->wq_reqconc[priority]; affinity_tag++) {
1614 /*
2d21ac55 1615 * look for first affinity group that is currently not active
b0d623f7
A
1616 * i.e. no active threads at this priority level or higher
1617 * and no threads that have run recently
2d21ac55 1618 */
b0d623f7
A
1619 for (activecount = 0, i = 0; i <= priority; i++) {
1620 if ((activecount = wq->wq_thactive_count[i][affinity_tag]))
1621 break;
1622
1623 if (wq->wq_thscheduled_count[i][affinity_tag]) {
1624 if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i][affinity_tag])) {
1625 busycount++;
1626 break;
1627 }
1628 }
2d21ac55 1629 }
b0d623f7
A
1630 if (activecount == 0 && busycount == 0)
1631 break;
2d21ac55 1632 }
b0d623f7
A
1633 if (affinity_tag >= wq->wq_reqconc[priority]) {
1634 /*
1635 * we've already got at least 1 thread per
1636 * affinity group in the active state...
2d21ac55 1637 */
b0d623f7
A
1638 if (busycount) {
1639 /*
1640 * we found at least 1 thread in the
1641 * 'busy' state... make sure we start
1642 * the timer because if they are the only
1643 * threads keeping us from scheduling
1644 * this workitem, we won't get a callback
1645 * to kick off the timer... we need to
1646 * start it now...
2d21ac55 1647 */
b0d623f7 1648 WQ_TIMER_NEEDED(wq, start_timer);
2d21ac55 1649 }
b0d623f7 1650 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_NONE, wq, busycount, start_timer, 0, 0);
2d21ac55 1651
b0d623f7
A
1652 if (thread != THREAD_NULL) {
1653 /*
1654 * go park this one for later
2d21ac55 1655 */
b0d623f7 1656 th_to_park = thread;
2d21ac55
A
1657 goto parkit;
1658 }
b0d623f7
A
1659 goto out_of_work;
1660 }
1661 if (thread != THREAD_NULL) {
1662 /*
1663 * we're overbooked on the affinity group this thread is
1664 * currently associated with, but we have work to do
1665 * and at least 1 idle processor, so we'll just retarget
1666 * this thread to a new affinity group
1667 */
1668 th_to_run = thread;
1669 goto pick_up_work;
1670 }
1671 if (wq->wq_thidlecount == 0) {
2d21ac55 1672 /*
b0d623f7
A
1673 * we don't have a thread to schedule, but we have
1674 * work to do and at least 1 affinity group that
1675 * doesn't currently have an active thread...
2d21ac55 1676 */
b0d623f7
A
1677 WQ_TIMER_NEEDED(wq, start_timer);
1678
1679 KERNEL_DEBUG(0xefffd118, wq, wq->wq_nthreads, start_timer, 0, 0);
1680
1681 goto no_thread_to_run;
1682 }
1683
1684grab_idle_thread:
1685 /*
1686 * we've got a candidate (affinity group with no currently
1687 * active threads) to start a new thread on...
1688 * we already know there is both work available
1689 * and an idle thread, so activate a thread and then
1690 * fall into the code that pulls a new workitem...
1691 */
1692 TAILQ_FOREACH(ttl, &wq->wq_thidlelist, th_entry) {
1693 if (ttl->th_affinity_tag == affinity_tag || ttl->th_affinity_tag == (uint16_t)-1) {
1694
1695 TAILQ_REMOVE(&wq->wq_thidlelist, ttl, th_entry);
1696 tl = ttl;
1697
1698 break;
1699 }
2d21ac55 1700 }
b0d623f7
A
1701 if (tl == NULL) {
1702 tl = TAILQ_FIRST(&wq->wq_thidlelist);
1703 TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
1704 }
1705 wq->wq_thidlecount--;
2d21ac55 1706
2d21ac55
A
1707 TAILQ_INSERT_TAIL(&wq->wq_thrunlist, tl, th_entry);
1708
1709 if ((tl->th_flags & TH_LIST_SUSPENDED) == TH_LIST_SUSPENDED) {
b0d623f7 1710 tl->th_flags &= ~TH_LIST_SUSPENDED;
2d21ac55 1711 reuse_thread = 0;
b0d623f7
A
1712
1713 thread_sched_call(tl->th_thread, workqueue_callback);
1714
2d21ac55 1715 } else if ((tl->th_flags & TH_LIST_BLOCKED) == TH_LIST_BLOCKED) {
b0d623f7
A
1716 tl->th_flags &= ~TH_LIST_BLOCKED;
1717 tl->th_flags |= TH_LIST_BUSY;
2d21ac55
A
1718 wake_thread = 1;
1719 }
1720 tl->th_flags |= TH_LIST_RUNNING;
1721
b0d623f7
A
1722 wq->wq_threads_scheduled++;
1723 wq->wq_thscheduled_count[priority][affinity_tag]++;
1724 OSAddAtomic(1, &wq->wq_thactive_count[priority][affinity_tag]);
2d21ac55 1725
b0d623f7
A
1726 adjust_counters = FALSE;
1727 th_to_run = tl->th_thread;
2d21ac55
A
1728
1729pick_up_work:
b0d623f7
A
1730 if (item == 0) {
1731 witem = TAILQ_FIRST(&wl->wl_itemlist);
1732 TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry);
1733
1734 if (TAILQ_EMPTY(&wl->wl_itemlist))
1735 wq->wq_list_bitmap &= ~(1 << priority);
1736 wq->wq_itemcount--;
1737
1738 item = witem->wi_item;
1739 witem->wi_item = (user_addr_t)0;
1740 witem->wi_affinity = 0;
1741 TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry);
1742 }
1743 orig_priority = tl->th_priority;
1744 orig_affinity_tag = tl->th_affinity_tag;
2d21ac55 1745
b0d623f7
A
1746 tl->th_priority = priority;
1747 tl->th_affinity_tag = affinity_tag;
2d21ac55 1748
b0d623f7
A
1749 if (adjust_counters == TRUE && (orig_priority != priority || orig_affinity_tag != affinity_tag)) {
1750 /*
1751 * we need to adjust these counters based on this
1752 * thread's new disposition w/r to affinity and priority
1753 */
1754 OSAddAtomic(-1, &wq->wq_thactive_count[orig_priority][orig_affinity_tag]);
1755 OSAddAtomic(1, &wq->wq_thactive_count[priority][affinity_tag]);
2d21ac55 1756
b0d623f7
A
1757 wq->wq_thscheduled_count[orig_priority][orig_affinity_tag]--;
1758 wq->wq_thscheduled_count[priority][affinity_tag]++;
2d21ac55 1759 }
b0d623f7 1760 wq->wq_thread_yielded_count = 0;
2d21ac55 1761
b0d623f7
A
1762 workqueue_unlock(p);
1763
1764 if (orig_affinity_tag != affinity_tag) {
2d21ac55 1765 /*
b0d623f7
A
1766 * this thread's affinity does not match the affinity group
1767 * its being placed on (it's either a brand new thread or
1768 * we're retargeting an existing thread to a new group)...
1769 * affinity tag of 0 means no affinity...
1770 * but we want our tags to be 0 based because they
1771 * are used to index arrays, so...
1772 * keep it 0 based internally and bump by 1 when
1773 * calling out to set it
2d21ac55 1774 */
b0d623f7 1775 KERNEL_DEBUG(0xefffd114 | DBG_FUNC_START, wq, orig_affinity_tag, 0, 0, 0);
2d21ac55 1776
b0d623f7 1777 (void)thread_affinity_set(th_to_run, affinity_tag + 1);
2d21ac55 1778
b0d623f7 1779 KERNEL_DEBUG(0xefffd114 | DBG_FUNC_END, wq, affinity_tag, 0, 0, 0);
2d21ac55 1780 }
b0d623f7
A
1781 if (orig_priority != priority) {
1782 thread_precedence_policy_data_t precedinfo;
1783 thread_extended_policy_data_t extinfo;
1784 uint32_t policy;
2d21ac55 1785
b0d623f7
A
1786 policy = workqueue_policy[priority];
1787
1788 KERNEL_DEBUG(0xefffd120 | DBG_FUNC_START, wq, orig_priority, tl->th_policy, 0, 0);
1789
1790 if (tl->th_policy != policy) {
2d21ac55 1791
b0d623f7
A
1792 extinfo.timeshare = policy;
1793 (void)thread_policy_set_internal(th_to_run, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT);
1794
1795 tl->th_policy = policy;
1796 }
1797 precedinfo.importance = workqueue_importance[priority];
1798 (void)thread_policy_set_internal(th_to_run, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
2d21ac55 1799
b0d623f7
A
1800 KERNEL_DEBUG(0xefffd120 | DBG_FUNC_END, wq, priority, policy, 0, 0);
1801 }
1802 if (kdebug_enable) {
1803 int lpri = -1;
1804 int laffinity = -1;
1805 int first = -1;
1806 uint32_t code = 0xefffd02c | DBG_FUNC_START;
1807
1808 for (n = 0; n < WORKQUEUE_NUMPRIOS; n++) {
1809 for (i = 0; i < wq->wq_affinity_max; i++) {
1810 if (wq->wq_thactive_count[n][i]) {
1811 if (lpri != -1) {
1812 KERNEL_DEBUG(code, lpri, laffinity, wq->wq_thactive_count[lpri][laffinity], first, 0);
1813 code = 0xefffd02c;
1814 first = 0;
1815 }
1816 lpri = n;
1817 laffinity = i;
1818 }
1819 }
1820 }
1821 if (lpri != -1) {
1822 if (first == -1)
1823 first = 0xeeeeeeee;
1824 KERNEL_DEBUG(0xefffd02c | DBG_FUNC_END, lpri, laffinity, wq->wq_thactive_count[lpri][laffinity], first, 0);
1825 }
1826 }
2d21ac55
A
1827 /*
1828 * if current thread is reused for workitem, does not return via unix_syscall
1829 */
1830 wq_runitem(p, item, th_to_run, tl, reuse_thread, wake_thread, (thread == th_to_run));
1831
b0d623f7 1832 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, wq, thread_tid(th_to_run), item, 1, 0);
2d21ac55 1833
b0d623f7 1834 return (TRUE);
2d21ac55 1835
b0d623f7
A
1836out_of_work:
1837 /*
1838 * we have no work to do or we are fully booked
1839 * w/r to running threads...
1840 */
1841no_thread_to_run:
1842 workqueue_unlock(p);
1843
1844 if (start_timer)
1845 workqueue_interval_timer_start(wq);
1846
1847 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, wq, thread_tid(thread), 0, 2, 0);
1848
1849 return (FALSE);
2d21ac55
A
1850
1851parkit:
2d21ac55
A
1852 /*
1853 * this is a workqueue thread with no more
1854 * work to do... park it for now
1855 */
1856 uth = get_bsdthread_info(th_to_park);
1857 tl = uth->uu_threadlist;
1858 if (tl == 0)
1859 panic("wq thread with no threadlist ");
1860
1861 TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
1862 tl->th_flags &= ~TH_LIST_RUNNING;
1863
1864 tl->th_flags |= TH_LIST_BLOCKED;
b0d623f7
A
1865 TAILQ_INSERT_HEAD(&wq->wq_thidlelist, tl, th_entry);
1866
1867 thread_sched_call(th_to_park, NULL);
1868
1869 OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority][tl->th_affinity_tag]);
1870 wq->wq_thscheduled_count[tl->th_priority][tl->th_affinity_tag]--;
1871 wq->wq_threads_scheduled--;
2d21ac55 1872
b0d623f7
A
1873 if (wq->wq_thidlecount < 100)
1874 us_to_wait = wq_reduce_pool_window_usecs - (wq->wq_thidlecount * (wq_reduce_pool_window_usecs / 100));
1875 else
1876 us_to_wait = wq_reduce_pool_window_usecs / 100;
1877
1878 wq->wq_thidlecount++;
1879
1880 assert_wait_timeout((caddr_t)tl, (THREAD_INTERRUPTIBLE), us_to_wait, NSEC_PER_USEC);
2d21ac55
A
1881
1882 workqueue_unlock(p);
1883
1884 if (start_timer)
b0d623f7
A
1885 workqueue_interval_timer_start(wq);
1886
1887 KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_START, wq, wq->wq_threads_scheduled, wq->wq_thidlecount, us_to_wait, thread_tid(th_to_park));
1888 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, wq, thread_tid(thread), 0, 3, 0);
2d21ac55 1889
b0d623f7
A
1890 thread_block((thread_continue_t)wq_unpark_continue);
1891 /* NOT REACHED */
2d21ac55 1892
b0d623f7
A
1893 return (FALSE);
1894}
2d21ac55 1895
2d21ac55 1896
b0d623f7
A
1897static void
1898wq_unpark_continue(void)
1899{
1900 struct uthread *uth = NULL;
1901 struct threadlist *tl;
1902 thread_t th_to_unpark;
1903 proc_t p;
1904
1905 th_to_unpark = current_thread();
1906 uth = get_bsdthread_info(th_to_unpark);
1907
1908 if (uth != NULL) {
1909 if ((tl = uth->uu_threadlist) != NULL) {
1910
1911 if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) {
1912 /*
1913 * a normal wakeup of this thread occurred... no need
1914 * for any synchronization with the timer and wq_runitem
1915 */
1916normal_return_to_user:
1917 thread_sched_call(th_to_unpark, workqueue_callback);
2d21ac55 1918
b0d623f7
A
1919 KERNEL_DEBUG(0xefffd018 | DBG_FUNC_END, tl->th_workq, 0, 0, 0, 0);
1920
1921 thread_exception_return();
1922 }
1923 p = current_proc();
1924
1925 workqueue_lock_spin(p);
1926
1927 if ( !(tl->th_flags & TH_LIST_RUNNING)) {
1928 /*
1929 * the timer popped us out and we've not
1930 * been moved off of the idle list
1931 * so we should now self-destruct
1932 *
1933 * workqueue_removethread consumes the lock
1934 */
1935 workqueue_removethread(tl);
1936
1937 thread_exception_return();
1938 }
1939 /*
1940 * the timer woke us up, but we have already
1941 * started to make this a runnable thread,
1942 * but have not yet finished that process...
1943 * so wait for the normal wakeup
1944 */
1945 while ((tl->th_flags & TH_LIST_BUSY)) {
1946
1947 assert_wait((caddr_t)tl, (THREAD_UNINT));
1948
1949 workqueue_unlock(p);
2d21ac55 1950
b0d623f7
A
1951 thread_block(THREAD_CONTINUE_NULL);
1952
1953 workqueue_lock_spin(p);
1954 }
1955 /*
1956 * we have finished setting up the thread's context
1957 * now we can return as if we got a normal wakeup
1958 */
1959 workqueue_unlock(p);
2d21ac55 1960
b0d623f7
A
1961 goto normal_return_to_user;
1962 }
1963 }
1964 thread_exception_return();
2d21ac55
A
1965}
1966
b0d623f7
A
1967
1968
2d21ac55
A
1969static void
1970wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
1971 int reuse_thread, int wake_thread, int return_directly)
1972{
1973 int ret = 0;
1974
b0d623f7 1975 KERNEL_DEBUG1(0xefffd004 | DBG_FUNC_START, tl->th_workq, tl->th_priority, tl->th_affinity_tag, thread_tid(current_thread()), thread_tid(th));
2d21ac55
A
1976
1977 ret = setup_wqthread(p, th, item, reuse_thread, tl);
1978
1979 if (ret != 0)
1980 panic("setup_wqthread failed %x\n", ret);
1981
1982 if (return_directly) {
b0d623f7
A
1983 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, tl->th_workq, 0, 0, 4, 0);
1984
2d21ac55
A
1985 thread_exception_return();
1986
1987 panic("wq_runitem: thread_exception_return returned ...\n");
1988 }
1989 if (wake_thread) {
b0d623f7
A
1990 workqueue_lock_spin(p);
1991
1992 tl->th_flags &= ~TH_LIST_BUSY;
2d21ac55 1993 wakeup(tl);
b0d623f7
A
1994
1995 workqueue_unlock(p);
2d21ac55 1996 } else {
b0d623f7 1997 KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END, tl->th_workq, 0, 0, thread_tid(current_thread()), thread_tid(th));
2d21ac55
A
1998
1999 thread_resume(th);
2000 }
2001}
2002
2003
2004int
2005setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl)
2006{
2d21ac55
A
2007#if defined(__ppc__)
2008 /*
2009 * Set up PowerPC registers...
2010 * internally they are always kept as 64 bit and
2011 * since the register set is the same between 32 and 64bit modes
2012 * we don't need 2 different methods for setting the state
2013 */
2014 {
2015 ppc_thread_state64_t state64;
2016 ppc_thread_state64_t *ts64 = &state64;
2017
2018 ts64->srr0 = (uint64_t)p->p_wqthread;
2019 ts64->r1 = (uint64_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_ARGSAVE_LEN - C_RED_ZONE);
2020 ts64->r3 = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE);
b0d623f7 2021 ts64->r4 = (uint64_t)(tl->th_thport);
2d21ac55
A
2022 ts64->r5 = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE);
2023 ts64->r6 = (uint64_t)item;
2024 ts64->r7 = (uint64_t)reuse_thread;
2025 ts64->r8 = (uint64_t)0;
2026
b0d623f7
A
2027 if ((reuse_thread != 0) && (ts64->r3 == (uint64_t)0))
2028 panic("setup_wqthread: setting reuse thread with null pthread\n");
2d21ac55
A
2029 thread_set_wq_state64(th, (thread_state_t)ts64);
2030 }
b0d623f7 2031#elif defined(__i386__) || defined(__x86_64__)
2d21ac55
A
2032 int isLP64 = 0;
2033
2034 isLP64 = IS_64BIT_PROCESS(p);
2035 /*
2036 * Set up i386 registers & function call.
2037 */
2038 if (isLP64 == 0) {
2039 x86_thread_state32_t state;
2040 x86_thread_state32_t *ts = &state;
2041
2042 ts->eip = (int)p->p_wqthread;
2043 ts->eax = (unsigned int)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE);
2044 ts->ebx = (unsigned int)tl->th_thport;
2045 ts->ecx = (unsigned int)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE);
2046 ts->edx = (unsigned int)item;
2047 ts->edi = (unsigned int)reuse_thread;
2048 ts->esi = (unsigned int)0;
2049 /*
2050 * set stack pointer
2051 */
2052 ts->esp = (int)((vm_offset_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_32_STK_ALIGN));
2053
b0d623f7
A
2054 if ((reuse_thread != 0) && (ts->eax == (unsigned int)0))
2055 panic("setup_wqthread: setting reuse thread with null pthread\n");
2d21ac55
A
2056 thread_set_wq_state32(th, (thread_state_t)ts);
2057
2058 } else {
2059 x86_thread_state64_t state64;
2060 x86_thread_state64_t *ts64 = &state64;
2061
2062 ts64->rip = (uint64_t)p->p_wqthread;
2063 ts64->rdi = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE);
b0d623f7 2064 ts64->rsi = (uint64_t)(tl->th_thport);
2d21ac55
A
2065 ts64->rdx = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE);
2066 ts64->rcx = (uint64_t)item;
2067 ts64->r8 = (uint64_t)reuse_thread;
2068 ts64->r9 = (uint64_t)0;
2069
2070 /*
2071 * set stack pointer aligned to 16 byte boundary
2072 */
2073 ts64->rsp = (uint64_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_64_REDZONE_LEN);
2074
b0d623f7
A
2075 if ((reuse_thread != 0) && (ts64->rdi == (uint64_t)0))
2076 panic("setup_wqthread: setting reuse thread with null pthread\n");
2d21ac55
A
2077 thread_set_wq_state64(th, (thread_state_t)ts64);
2078 }
2d21ac55
A
2079#else
2080#error setup_wqthread not defined for this architecture
2081#endif
2082 return(0);
2083}
2084
b0d623f7
A
2085int
2086fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo)
2087{
2088 struct workqueue * wq;
2089 int error = 0;
2090 int activecount;
2091 uint32_t pri, affinity;
2092
2093 workqueue_lock_spin(p);
2094 if ((wq = p->p_wqptr) == NULL) {
2095 error = EINVAL;
2096 goto out;
2097 }
2098 activecount = 0;
2099
2100 for (pri = 0; pri < WORKQUEUE_NUMPRIOS; pri++) {
2101 for (affinity = 0; affinity < wq->wq_affinity_max; affinity++)
2102 activecount += wq->wq_thactive_count[pri][affinity];
2103 }
2104 pwqinfo->pwq_nthreads = wq->wq_nthreads;
2105 pwqinfo->pwq_runthreads = activecount;
2106 pwqinfo->pwq_blockedthreads = wq->wq_threads_scheduled - activecount;
2107out:
2108 workqueue_unlock(p);
2109 return(error);
2110}
2111
2112/* Set target concurrency of one of the queue(0,1,2) with specified value */
2113int
2114proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc)
2115{
2116 proc_t p, self;
2117 uint64_t addr;
2118 int32_t conc = targetconc;
2119 int error = 0;
2120 vm_map_t oldmap = VM_MAP_NULL;
2121 int gotref = 0;
2122
2123 self = current_proc();
2124 if (self->p_pid != pid) {
2125 /* if not on self, hold a refernce on the process */
2126
2127 if (pid == 0)
2128 return(EINVAL);
2129
2130 p = proc_find(pid);
2131
2132 if (p == PROC_NULL)
2133 return(ESRCH);
2134 gotref = 1;
2135
2136 } else
2137 p = self;
2138
2139 if ((addr = p->p_targconc) == (uint64_t)0) {
2140 error = EINVAL;
2141 goto out;
2142 }
2143
2144
2145 if ((queuenum >= WQ_MAXPRI_MIN) && (queuenum <= WQ_MAXPRI_MAX)) {
2146 addr += (queuenum * sizeof(int32_t));
2147 if (gotref == 1)
2148 oldmap = vm_map_switch(get_task_map(p->task));
2149 error = copyout(&conc, addr, sizeof(int32_t));
2150 if (gotref == 1)
2151 (void)vm_map_switch(oldmap);
2152
2153 } else {
2154 error = EINVAL;
2155 }
2156out:
2157 if (gotref == 1)
2158 proc_rele(p);
2159 return(error);
2160}
2161
2162
2163/* Set target concurrency on all the prio queues with specified value */
2164int
2165proc_setalltargetconc(pid_t pid, int32_t * targetconcp)
2166{
2167 proc_t p, self;
2168 uint64_t addr;
2169 int error = 0;
2170 vm_map_t oldmap = VM_MAP_NULL;
2171 int gotref = 0;
2172
2173 self = current_proc();
2174 if (self->p_pid != pid) {
2175 /* if not on self, hold a refernce on the process */
2176
2177 if (pid == 0)
2178 return(EINVAL);
2179
2180 p = proc_find(pid);
2181
2182 if (p == PROC_NULL)
2183 return(ESRCH);
2184 gotref = 1;
2185
2186 } else
2187 p = self;
2188
2189 if ((addr = (uint64_t)p->p_targconc) == (uint64_t)0) {
2190 error = EINVAL;
2191 goto out;
2192 }
2193
2194
2195 if (gotref == 1)
2196 oldmap = vm_map_switch(get_task_map(p->task));
2197
2198 error = copyout(targetconcp, addr, WQ_PRI_NUM * sizeof(int32_t));
2199 if (gotref == 1)
2200 (void)vm_map_switch(oldmap);
2201
2202out:
2203 if (gotref == 1)
2204 proc_rele(p);
2205 return(error);
2206}
2207
2208int thread_selfid(__unused struct proc *p, __unused struct thread_selfid_args *uap, user_addr_t *retval)
2209{
2210 thread_t thread = current_thread();
2211 uint64_t thread_id = thread_tid(thread);
2212 *retval = thread_id;
2213 return KERN_SUCCESS;
2214}
2215
2216void
2217pthread_init(void)
2218{
2219 pthread_lck_grp_attr = lck_grp_attr_alloc_init();
2220 pthread_lck_grp = lck_grp_alloc_init("pthread", pthread_lck_grp_attr);
2221
2222 /*
2223 * allocate the lock attribute for pthread synchronizers
2224 */
2225 pthread_lck_attr = lck_attr_alloc_init();
2226
2227 workqueue_init_lock((proc_t) get_bsdtask_info(kernel_task));
2228#if PSYNCH
2229 pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
2230
2231 pth_global_hashinit();
2232 psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL);
2233#endif /* PSYNCH */
2234}