]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/pthread_synch.c
153a2fa817d317ba971774ec4943ff49c46e83d0
[apple/xnu.git] / bsd / kern / pthread_synch.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
29 /*
30 * pthread_synch.c
31 */
32
33 #define _PTHREAD_CONDATTR_T
34 #define _PTHREAD_COND_T
35 #define _PTHREAD_MUTEXATTR_T
36 #define _PTHREAD_MUTEX_T
37 #define _PTHREAD_RWLOCKATTR_T
38 #define _PTHREAD_RWLOCK_T
39
40 #undef pthread_mutexattr_t
41 #undef pthread_mutex_t
42 #undef pthread_condattr_t
43 #undef pthread_cond_t
44 #undef pthread_rwlockattr_t
45 #undef pthread_rwlock_t
46
47 #include <sys/param.h>
48 #include <sys/queue.h>
49 #include <sys/resourcevar.h>
50 #include <sys/proc_internal.h>
51 #include <sys/kauth.h>
52 #include <sys/systm.h>
53 #include <sys/timeb.h>
54 #include <sys/times.h>
55 #include <sys/acct.h>
56 #include <sys/kernel.h>
57 #include <sys/wait.h>
58 #include <sys/signalvar.h>
59 #include <sys/syslog.h>
60 #include <sys/stat.h>
61 #include <sys/lock.h>
62 #include <sys/kdebug.h>
63 #include <sys/sysproto.h>
64 #include <sys/pthread_internal.h>
65 #include <sys/vm.h>
66 #include <sys/user.h> /* for coredump */
67
68
69 #include <mach/mach_types.h>
70 #include <mach/vm_prot.h>
71 #include <mach/semaphore.h>
72 #include <mach/sync_policy.h>
73 #include <mach/task.h>
74 #include <kern/kern_types.h>
75 #include <kern/task.h>
76 #include <kern/clock.h>
77 #include <mach/kern_return.h>
78 #include <kern/thread.h>
79 #include <kern/sched_prim.h>
80 #include <kern/kalloc.h>
81 #include <kern/sched_prim.h> /* for thread_exception_return */
82 #include <kern/processor.h>
83 #include <kern/affinity.h>
84 #include <mach/mach_vm.h>
85 #include <mach/mach_param.h>
86 #include <mach/thread_status.h>
87 #include <mach/thread_policy.h>
88 #include <mach/message.h>
89 #include <mach/port.h>
90 #include <vm/vm_protos.h>
91 #include <vm/vm_map.h>` /* for current_map() */
92 #include <mach/thread_act.h> /* for thread_resume */
93 #include <machine/machine_routines.h>
94 #if defined(__i386__)
95 #include <i386/machine_routines.h>
96 #include <i386/eflags.h>
97 #include <i386/psl.h>
98 #include <i386/seg.h>
99 #endif
100
101 #include <libkern/OSAtomic.h>
102
103 #if 0
104 #undef KERNEL_DEBUG
105 #define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
106 #undef KERNEL_DEBUG1
107 #define KERNEL_DEBUG1 KERNEL_DEBUG_CONSTANT1
108 #endif
109
110
111 #if defined(__ppc__) || defined(__ppc64__)
112 #include <architecture/ppc/cframe.h>
113 #endif
114
115
116 lck_grp_attr_t *pthread_lck_grp_attr;
117 lck_grp_t *pthread_lck_grp;
118 lck_attr_t *pthread_lck_attr;
119 lck_mtx_t * pthread_list_mlock;
120 extern void pthread_init(void);
121
122 extern kern_return_t thread_getstatus(register thread_t act, int flavor,
123 thread_state_t tstate, mach_msg_type_number_t *count);
124 extern kern_return_t thread_setstatus(thread_t thread, int flavor,
125 thread_state_t tstate, mach_msg_type_number_t count);
126 extern void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64);
127 extern kern_return_t mach_port_deallocate(ipc_space_t, mach_port_name_t);
128 extern kern_return_t semaphore_signal_internal_trap(mach_port_name_t);
129
130 static int workqueue_additem(struct workqueue *wq, int prio, user_addr_t item);
131 static int workqueue_removeitem(struct workqueue *wq, int prio, user_addr_t item);
132 static void workqueue_run_nextitem(proc_t p, thread_t th);
133 static void wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
134 int reuse_thread, int wake_thread, int return_directly);
135 static int setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl);
136 static int workqueue_addnewthread(struct workqueue *wq);
137 static void workqueue_removethread(struct workqueue *wq);
138 static void workqueue_lock(proc_t);
139 static void workqueue_lock_spin(proc_t);
140 static void workqueue_unlock(proc_t);
141
142 #define C_32_STK_ALIGN 16
143 #define C_64_STK_ALIGN 16
144 #define C_64_REDZONE_LEN 128
145 #define TRUNC_DOWN32(a,c) ((((uint32_t)a)-(c)) & ((uint32_t)(-(c))))
146 #define TRUNC_DOWN64(a,c) ((((uint64_t)a)-(c)) & ((uint64_t)(-(c))))
147
148
149 /*
150 * Flags filed passed to bsdthread_create and back in pthread_start
151 31 <---------------------------------> 0
152 _________________________________________
153 | flags(8) | policy(8) | importance(16) |
154 -----------------------------------------
155 */
156 void _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags);
157
158 #define PTHREAD_START_CUSTOM 0x01000000
159 #define PTHREAD_START_SETSCHED 0x02000000
160 #define PTHREAD_START_DETACHED 0x04000000
161 #define PTHREAD_START_POLICY_BITSHIFT 16
162 #define PTHREAD_START_POLICY_MASK 0xffff
163 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
164
165 #define SCHED_OTHER POLICY_TIMESHARE
166 #define SCHED_FIFO POLICY_FIFO
167 #define SCHED_RR POLICY_RR
168
169 void
170 pthread_init(void)
171 {
172
173 pthread_lck_grp_attr = lck_grp_attr_alloc_init();
174 pthread_lck_grp = lck_grp_alloc_init("pthread", pthread_lck_grp_attr);
175
176 /*
177 * allocate the lock attribute for pthread synchronizers
178 */
179 pthread_lck_attr = lck_attr_alloc_init();
180
181 pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
182
183 }
184
185 void
186 pthread_list_lock(void)
187 {
188 lck_mtx_lock(pthread_list_mlock);
189 }
190
191 void
192 pthread_list_unlock(void)
193 {
194 lck_mtx_unlock(pthread_list_mlock);
195 }
196
197
198 int
199 __pthread_mutex_destroy(__unused struct proc *p, struct __pthread_mutex_destroy_args *uap, __unused register_t *retval)
200 {
201 int res;
202 int mutexid = uap->mutexid;
203 pthread_mutex_t * mutex;
204 lck_mtx_t * lmtx;
205 lck_mtx_t * lmtx1;
206
207
208 mutex = pthread_id_to_mutex(mutexid);
209 if (mutex == 0)
210 return(EINVAL);
211
212 MTX_LOCK(mutex->lock);
213 if (mutex->sig == _PTHREAD_KERN_MUTEX_SIG)
214 {
215 if (mutex->owner == (thread_t)NULL &&
216 mutex->refcount == 1)
217 {
218 mutex->sig = _PTHREAD_NO_SIG;
219 lmtx = mutex->mutex;
220 lmtx1 = mutex->lock;
221 mutex->mutex = NULL;
222 pthread_id_mutex_remove(mutexid);
223 mutex->refcount --;
224 MTX_UNLOCK(mutex->lock);
225 lck_mtx_free(lmtx, pthread_lck_grp);
226 lck_mtx_free(lmtx1, pthread_lck_grp);
227 kfree((void *)mutex, sizeof(struct _pthread_mutex));
228 return(0);
229 }
230 else
231 res = EBUSY;
232 }
233 else
234 res = EINVAL;
235 MTX_UNLOCK(mutex->lock);
236 pthread_mutex_release(mutex);
237 return (res);
238 }
239
240 /*
241 * Initialize a mutex variable, possibly with additional attributes.
242 */
243 static void
244 pthread_mutex_init_internal(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr)
245 {
246 mutex->prioceiling = attr->prioceiling;
247 mutex->protocol = attr->protocol;
248 mutex->type = attr->type;
249 mutex->pshared = attr->pshared;
250 mutex->refcount = 0;
251 mutex->owner = (thread_t)NULL;
252 mutex->owner_proc = current_proc();
253 mutex->sig = _PTHREAD_KERN_MUTEX_SIG;
254 mutex->lock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
255 mutex->mutex = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
256 }
257
258 /*
259 * Initialize a mutex variable, possibly with additional attributes.
260 * Public interface - so don't trust the lock - initialize it first.
261 */
262 int
263 __pthread_mutex_init(__unused struct proc *p, struct __pthread_mutex_init_args *uap, __unused register_t *retval)
264 {
265 user_addr_t umutex = uap->mutex;
266 pthread_mutex_t * mutex;
267 user_addr_t uattr = uap->attr;
268 pthread_mutexattr_t attr;
269 unsigned int addr = (unsigned int)((uintptr_t)uap->mutex);
270 int pmutex_sig;
271 int mutexid;
272 int error = 0;
273
274 if ((umutex == 0) || (uattr == 0))
275 return(EINVAL);
276
277 if ((error = copyin(uattr, &attr, sizeof(pthread_mutexattr_t))))
278 return(error);
279
280 if (attr.sig != _PTHREAD_MUTEX_ATTR_SIG)
281 return (EINVAL);
282
283 if ((error = copyin(umutex, &pmutex_sig, sizeof(int))))
284 return(error);
285
286 if (pmutex_sig == _PTHREAD_KERN_MUTEX_SIG)
287 return(EBUSY);
288 mutex = (pthread_mutex_t *)kalloc(sizeof(pthread_mutex_t));
289
290 pthread_mutex_init_internal(mutex, &attr);
291
292
293 addr += 8;
294 mutexid = pthread_id_mutex_add(mutex);
295 if (mutexid) {
296 if ((error = copyout(&mutexid, ((user_addr_t)((uintptr_t)(addr))), 4)))
297 goto cleanup;
298 return(0);
299 } else
300 error = ENOMEM;
301 cleanup:
302 if(mutexid)
303 pthread_id_mutex_remove(mutexid);
304 lck_mtx_free(mutex->lock, pthread_lck_grp);
305 lck_mtx_free(mutex->mutex, pthread_lck_grp);
306 kfree(mutex, sizeof(struct _pthread_mutex));
307 return(error);
308 }
309
310 /*
311 * Lock a mutex.
312 * TODO: Priority inheritance stuff
313 */
314 int
315 __pthread_mutex_lock(struct proc *p, struct __pthread_mutex_lock_args *uap, __unused register_t *retval)
316 {
317 int mutexid = uap->mutexid;
318 pthread_mutex_t * mutex;
319 int error;
320
321 mutex = pthread_id_to_mutex(mutexid);
322 if (mutex == 0)
323 return(EINVAL);
324
325 MTX_LOCK(mutex->lock);
326
327 if (mutex->sig != _PTHREAD_KERN_MUTEX_SIG)
328 {
329 error = EINVAL;
330 goto out;
331 }
332
333 if ((p != mutex->owner_proc) && (mutex->pshared != PTHREAD_PROCESS_SHARED)) {
334 error = EINVAL;
335 goto out;
336 }
337
338 MTX_UNLOCK(mutex->lock);
339
340 lck_mtx_lock(mutex->mutex);
341
342 MTX_LOCK(mutex->lock);
343 mutex->owner = current_thread();
344 error = 0;
345 out:
346 MTX_UNLOCK(mutex->lock);
347 pthread_mutex_release(mutex);
348 return (error);
349 }
350
351 /*
352 * Attempt to lock a mutex, but don't block if this isn't possible.
353 */
354 int
355 __pthread_mutex_trylock(struct proc *p, struct __pthread_mutex_trylock_args *uap, __unused register_t *retval)
356 {
357 int mutexid = uap->mutexid;
358 pthread_mutex_t * mutex;
359 boolean_t state;
360 int error;
361
362 mutex = pthread_id_to_mutex(mutexid);
363 if (mutex == 0)
364 return(EINVAL);
365
366 MTX_LOCK(mutex->lock);
367
368 if (mutex->sig != _PTHREAD_KERN_MUTEX_SIG)
369 {
370 error = EINVAL;
371 goto out;
372 }
373
374 if ((p != mutex->owner_proc) && (mutex->pshared != PTHREAD_PROCESS_SHARED)) {
375 error = EINVAL;
376 goto out;
377 }
378
379 MTX_UNLOCK(mutex->lock);
380
381 state = lck_mtx_try_lock(mutex->mutex);
382 if (state) {
383 MTX_LOCK(mutex->lock);
384 mutex->owner = current_thread();
385 MTX_UNLOCK(mutex->lock);
386 error = 0;
387 } else
388 error = EBUSY;
389
390 pthread_mutex_release(mutex);
391 return (error);
392 out:
393 MTX_UNLOCK(mutex->lock);
394 pthread_mutex_release(mutex);
395 return (error);
396 }
397
398 /*
399 * Unlock a mutex.
400 * TODO: Priority inheritance stuff
401 */
402 int
403 __pthread_mutex_unlock(struct proc *p, struct __pthread_mutex_unlock_args *uap, __unused register_t *retval)
404 {
405 int mutexid = uap->mutexid;
406 pthread_mutex_t * mutex;
407 int error;
408
409 mutex = pthread_id_to_mutex(mutexid);
410 if (mutex == 0)
411 return(EINVAL);
412
413 MTX_LOCK(mutex->lock);
414
415 if (mutex->sig != _PTHREAD_KERN_MUTEX_SIG)
416 {
417 error = EINVAL;
418 goto out;
419 }
420
421 if ((p != mutex->owner_proc) && (mutex->pshared != PTHREAD_PROCESS_SHARED)) {
422 error = EINVAL;
423 goto out;
424 }
425
426 MTX_UNLOCK(mutex->lock);
427
428 lck_mtx_unlock(mutex->mutex);
429
430 MTX_LOCK(mutex->lock);
431 mutex->owner = NULL;
432 error = 0;
433 out:
434 MTX_UNLOCK(mutex->lock);
435 pthread_mutex_release(mutex);
436 return (error);
437 }
438
439
440 int
441 __pthread_cond_init(__unused struct proc *p, struct __pthread_cond_init_args *uap, __unused register_t *retval)
442 {
443 pthread_cond_t * cond;
444 pthread_condattr_t attr;
445 user_addr_t ucond = uap->cond;
446 user_addr_t uattr = uap->attr;
447 unsigned int addr = (unsigned int)((uintptr_t)uap->cond);
448 int condid, error, cond_sig;
449 semaphore_t sem;
450 kern_return_t kret;
451 int value = 0;
452
453 if ((ucond == 0) || (uattr == 0))
454 return(EINVAL);
455
456 if ((error = copyin(uattr, &attr, sizeof(pthread_condattr_t))))
457 return(error);
458
459 if (attr.sig != _PTHREAD_COND_ATTR_SIG)
460 return (EINVAL);
461
462 if ((error = copyin(ucond, &cond_sig, sizeof(int))))
463 return(error);
464
465 if (cond_sig == _PTHREAD_KERN_COND_SIG)
466 return(EBUSY);
467 kret = semaphore_create(kernel_task, &sem, SYNC_POLICY_FIFO, value);
468 if (kret != KERN_SUCCESS)
469 return(ENOMEM);
470
471 cond = (pthread_cond_t *)kalloc(sizeof(pthread_cond_t));
472
473 cond->lock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
474 cond->pshared = attr.pshared;
475 cond->sig = _PTHREAD_KERN_COND_SIG;
476 cond->sigpending = 0;
477 cond->waiters = 0;
478 cond->refcount = 0;
479 cond->mutex = (pthread_mutex_t *)0;
480 cond->owner_proc = current_proc();
481 cond->sem = sem;
482
483 addr += 8;
484 condid = pthread_id_cond_add(cond);
485 if (condid) {
486 if ((error = copyout(&condid, ((user_addr_t)((uintptr_t)(addr))), 4)))
487 goto cleanup;
488 return(0);
489 } else
490 error = ENOMEM;
491 cleanup:
492 if(condid)
493 pthread_id_cond_remove(condid);
494 semaphore_destroy(kernel_task, cond->sem);
495 kfree(cond, sizeof(pthread_cond_t));
496 return(error);
497 }
498
499
500 /*
501 * Destroy a condition variable.
502 */
503 int
504 __pthread_cond_destroy(__unused struct proc *p, struct __pthread_cond_destroy_args *uap, __unused register_t *retval)
505 {
506 pthread_cond_t *cond;
507 int condid = uap->condid;
508 semaphore_t sem;
509 lck_mtx_t * lmtx;
510 int res;
511
512 cond = pthread_id_to_cond(condid);
513 if (cond == 0)
514 return(EINVAL);
515
516 COND_LOCK(cond->lock);
517 if (cond->sig == _PTHREAD_KERN_COND_SIG)
518 {
519 if (cond->refcount == 1)
520 {
521 cond->sig = _PTHREAD_NO_SIG;
522 sem = cond->sem;
523 cond->sem = NULL;
524 lmtx = cond->lock;
525 pthread_id_cond_remove(condid);
526 cond->refcount --;
527 COND_UNLOCK(cond->lock);
528 lck_mtx_free(lmtx, pthread_lck_grp);
529 (void)semaphore_destroy(kernel_task, sem);
530 kfree((void *)cond, sizeof(pthread_cond_t));
531 return(0);
532 }
533 else
534 res = EBUSY;
535 }
536 else
537 res = EINVAL;
538 COND_UNLOCK(cond->lock);
539 pthread_cond_release(cond);
540 return (res);
541 }
542
543
544 /*
545 * Signal a condition variable, waking up all threads waiting for it.
546 */
547 int
548 __pthread_cond_broadcast(__unused struct proc *p, struct __pthread_cond_broadcast_args *uap, __unused register_t *retval)
549 {
550 int condid = uap->condid;
551 pthread_cond_t * cond;
552 int error;
553 kern_return_t kret;
554
555 cond = pthread_id_to_cond(condid);
556 if (cond == 0)
557 return(EINVAL);
558
559 COND_LOCK(cond->lock);
560
561 if (cond->sig != _PTHREAD_KERN_COND_SIG)
562 {
563 error = EINVAL;
564 goto out;
565 }
566
567 if ((p != cond->owner_proc) && (cond->pshared != PTHREAD_PROCESS_SHARED)) {
568 error = EINVAL;
569 goto out;
570 }
571
572 COND_UNLOCK(cond->lock);
573
574 kret = semaphore_signal_all(cond->sem);
575 switch (kret) {
576 case KERN_INVALID_ADDRESS:
577 case KERN_PROTECTION_FAILURE:
578 error = EINVAL;
579 break;
580 case KERN_ABORTED:
581 case KERN_OPERATION_TIMED_OUT:
582 error = EINTR;
583 break;
584 case KERN_SUCCESS:
585 error = 0;
586 break;
587 default:
588 error = EINVAL;
589 break;
590 }
591
592 COND_LOCK(cond->lock);
593 out:
594 COND_UNLOCK(cond->lock);
595 pthread_cond_release(cond);
596 return (error);
597 }
598
599
600 /*
601 * Signal a condition variable, waking only one thread.
602 */
603 int
604 __pthread_cond_signal(__unused struct proc *p, struct __pthread_cond_signal_args *uap, __unused register_t *retval)
605 {
606 int condid = uap->condid;
607 pthread_cond_t * cond;
608 int error;
609 kern_return_t kret;
610
611 cond = pthread_id_to_cond(condid);
612 if (cond == 0)
613 return(EINVAL);
614
615 COND_LOCK(cond->lock);
616
617 if (cond->sig != _PTHREAD_KERN_COND_SIG)
618 {
619 error = EINVAL;
620 goto out;
621 }
622
623 if ((p != cond->owner_proc) && (cond->pshared != PTHREAD_PROCESS_SHARED)) {
624 error = EINVAL;
625 goto out;
626 }
627
628 COND_UNLOCK(cond->lock);
629
630 kret = semaphore_signal(cond->sem);
631 switch (kret) {
632 case KERN_INVALID_ADDRESS:
633 case KERN_PROTECTION_FAILURE:
634 error = EINVAL;
635 break;
636 case KERN_ABORTED:
637 case KERN_OPERATION_TIMED_OUT:
638 error = EINTR;
639 break;
640 case KERN_SUCCESS:
641 error = 0;
642 break;
643 default:
644 error = EINVAL;
645 break;
646 }
647
648 COND_LOCK(cond->lock);
649 out:
650 COND_UNLOCK(cond->lock);
651 pthread_cond_release(cond);
652 return (error);
653 }
654
655
656 int
657 __pthread_cond_wait(__unused struct proc *p, struct __pthread_cond_wait_args *uap, __unused register_t *retval)
658 {
659 int condid = uap->condid;
660 pthread_cond_t * cond;
661 int mutexid = uap->mutexid;
662 pthread_mutex_t * mutex;
663 int error;
664 kern_return_t kret;
665
666 cond = pthread_id_to_cond(condid);
667 if (cond == 0)
668 return(EINVAL);
669
670 mutex = pthread_id_to_mutex(mutexid);
671 if (mutex == 0) {
672 pthread_cond_release(cond);
673 return(EINVAL);
674 }
675 COND_LOCK(cond->lock);
676
677 if (cond->sig != _PTHREAD_KERN_COND_SIG)
678 {
679 error = EINVAL;
680 goto out;
681 }
682
683 if ((p != cond->owner_proc) && (cond->pshared != PTHREAD_PROCESS_SHARED)) {
684 error = EINVAL;
685 goto out;
686 }
687
688 COND_UNLOCK(cond->lock);
689
690 kret = semaphore_wait(cond->sem);
691 switch (kret) {
692 case KERN_INVALID_ADDRESS:
693 case KERN_PROTECTION_FAILURE:
694 error = EACCES;
695 break;
696 case KERN_ABORTED:
697 case KERN_OPERATION_TIMED_OUT:
698 error = EINTR;
699 break;
700 case KERN_SUCCESS:
701 error = 0;
702 break;
703 default:
704 error = EINVAL;
705 break;
706 }
707
708 COND_LOCK(cond->lock);
709 out:
710 COND_UNLOCK(cond->lock);
711 pthread_cond_release(cond);
712 pthread_mutex_release(mutex);
713 return (error);
714 }
715
716 int
717 __pthread_cond_timedwait(__unused struct proc *p, struct __pthread_cond_timedwait_args *uap, __unused register_t *retval)
718 {
719 int condid = uap->condid;
720 pthread_cond_t * cond;
721 int mutexid = uap->mutexid;
722 pthread_mutex_t * mutex;
723 mach_timespec_t absts;
724 int error;
725 kern_return_t kret;
726
727 absts.tv_sec = 0;
728 absts.tv_nsec = 0;
729
730 if (uap->abstime)
731 if ((error = copyin(uap->abstime, &absts, sizeof(mach_timespec_t ))))
732 return(error);
733 cond = pthread_id_to_cond(condid);
734 if (cond == 0)
735 return(EINVAL);
736
737 mutex = pthread_id_to_mutex(mutexid);
738 if (mutex == 0) {
739 pthread_cond_release(cond);
740 return(EINVAL);
741 }
742 COND_LOCK(cond->lock);
743
744 if (cond->sig != _PTHREAD_KERN_COND_SIG)
745 {
746 error = EINVAL;
747 goto out;
748 }
749
750 if ((p != cond->owner_proc) && (cond->pshared != PTHREAD_PROCESS_SHARED)) {
751 error = EINVAL;
752 goto out;
753 }
754
755 COND_UNLOCK(cond->lock);
756
757 kret = semaphore_timedwait(cond->sem, absts);
758 switch (kret) {
759 case KERN_INVALID_ADDRESS:
760 case KERN_PROTECTION_FAILURE:
761 error = EACCES;
762 break;
763 case KERN_ABORTED:
764 case KERN_OPERATION_TIMED_OUT:
765 error = EINTR;
766 break;
767 case KERN_SUCCESS:
768 error = 0;
769 break;
770 default:
771 error = EINVAL;
772 break;
773 }
774
775 COND_LOCK(cond->lock);
776 out:
777 COND_UNLOCK(cond->lock);
778 pthread_cond_release(cond);
779 pthread_mutex_release(mutex);
780 return (error);
781 }
782
783 int
784 bsdthread_create(__unused struct proc *p, struct bsdthread_create_args *uap, user_addr_t *retval)
785 {
786 kern_return_t kret;
787 void * sright;
788 int error = 0;
789 int allocated = 0;
790 mach_vm_offset_t stackaddr;
791 mach_vm_size_t th_allocsize = 0;
792 mach_vm_size_t user_stacksize;
793 mach_vm_size_t th_stacksize;
794 mach_vm_offset_t th_stackaddr;
795 mach_vm_offset_t th_stack;
796 mach_vm_offset_t th_pthread;
797 mach_port_t th_thport;
798 thread_t th;
799 user_addr_t user_func = uap->func;
800 user_addr_t user_funcarg = uap->func_arg;
801 user_addr_t user_stack = uap->stack;
802 user_addr_t user_pthread = uap->pthread;
803 unsigned int flags = (unsigned int)uap->flags;
804 vm_map_t vmap = current_map();
805 task_t ctask = current_task();
806 unsigned int policy, importance;
807
808 int isLP64 = 0;
809
810
811 #if 0
812 KERNEL_DEBUG_CONSTANT(0x9000080 | DBG_FUNC_START, flags, 0, 0, 0, 0);
813 #endif
814
815 isLP64 = IS_64BIT_PROCESS(p);
816
817
818 #if defined(__ppc__)
819 stackaddr = 0xF0000000;
820 #elif defined(__i386__)
821 stackaddr = 0xB0000000;
822 #elif defined(__arm__)
823 stackaddr = 0xB0000000; /* XXX ARM */
824 #else
825 #error Need to define a stack address hint for this architecture
826 #endif
827 kret = thread_create(ctask, &th);
828 if (kret != KERN_SUCCESS)
829 return(ENOMEM);
830 thread_reference(th);
831
832 sright = (void *) convert_thread_to_port(th);
833 th_thport = (void *)ipc_port_copyout_send(sright, get_task_ipcspace(ctask));
834
835 if ((flags & PTHREAD_START_CUSTOM) == 0) {
836 th_stacksize = (mach_vm_size_t)user_stack; /* if it is custom them it is stacksize */
837 th_allocsize = th_stacksize + PTH_DEFAULT_GUARDSIZE + p->p_pthsize;
838
839 kret = mach_vm_map(vmap, &stackaddr,
840 th_allocsize,
841 page_size-1,
842 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
843 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
844 VM_INHERIT_DEFAULT);
845 if (kret != KERN_SUCCESS)
846 kret = mach_vm_allocate(vmap,
847 &stackaddr, th_allocsize,
848 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
849 if (kret != KERN_SUCCESS) {
850 error = ENOMEM;
851 goto out;
852 }
853 #if 0
854 KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_NONE, th_allocsize, stackaddr, 0, 2, 0);
855 #endif
856 th_stackaddr = stackaddr;
857 allocated = 1;
858 /*
859 * The guard page is at the lowest address
860 * The stack base is the highest address
861 */
862 kret = mach_vm_protect(vmap, stackaddr, PTH_DEFAULT_GUARDSIZE, FALSE, VM_PROT_NONE);
863
864 if (kret != KERN_SUCCESS) {
865 error = ENOMEM;
866 goto out1;
867 }
868 th_stack = (stackaddr + th_stacksize + PTH_DEFAULT_GUARDSIZE);
869 th_pthread = (stackaddr + th_stacksize + PTH_DEFAULT_GUARDSIZE);
870 user_stacksize = th_stacksize;
871 } else {
872 th_stack = user_stack;
873 user_stacksize = user_stack;
874 th_pthread = user_pthread;
875 #if 0
876 KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_NONE, 0, 0, 0, 3, 0);
877 #endif
878 }
879
880 #if defined(__ppc__)
881 /*
882 * Set up PowerPC registers...
883 * internally they are always kept as 64 bit and
884 * since the register set is the same between 32 and 64bit modes
885 * we don't need 2 different methods for setting the state
886 */
887 {
888 ppc_thread_state64_t state64;
889 ppc_thread_state64_t *ts64 = &state64;
890
891 ts64->srr0 = (uint64_t)p->p_threadstart;
892 ts64->r1 = (uint64_t)(th_stack - C_ARGSAVE_LEN - C_RED_ZONE);
893 ts64->r3 = (uint64_t)th_pthread;
894 ts64->r4 = (uint64_t)((unsigned int)th_thport);
895 ts64->r5 = (uint64_t)user_func;
896 ts64->r6 = (uint64_t)user_funcarg;
897 ts64->r7 = (uint64_t)user_stacksize;
898 ts64->r8 = (uint64_t)uap->flags;
899
900 thread_set_wq_state64(th, (thread_state_t)ts64);
901
902 thread_set_cthreadself(th, (uint64_t)th_pthread, isLP64);
903 }
904 #elif defined(__i386__)
905 {
906 /*
907 * Set up i386 registers & function call.
908 */
909 if (isLP64 == 0) {
910 x86_thread_state32_t state;
911 x86_thread_state32_t *ts = &state;
912
913 ts->eip = (int)p->p_threadstart;
914 ts->eax = (unsigned int)th_pthread;
915 ts->ebx = (unsigned int)th_thport;
916 ts->ecx = (unsigned int)user_func;
917 ts->edx = (unsigned int)user_funcarg;
918 ts->edi = (unsigned int)user_stacksize;
919 ts->esi = (unsigned int)uap->flags;
920 /*
921 * set stack pointer
922 */
923 ts->esp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN));
924
925 thread_set_wq_state32(th, (thread_state_t)ts);
926
927 } else {
928 x86_thread_state64_t state64;
929 x86_thread_state64_t *ts64 = &state64;
930
931 ts64->rip = (uint64_t)p->p_threadstart;
932 ts64->rdi = (uint64_t)th_pthread;
933 ts64->rsi = (uint64_t)((unsigned int)(th_thport));
934 ts64->rdx = (uint64_t)user_func;
935 ts64->rcx = (uint64_t)user_funcarg;
936 ts64->r8 = (uint64_t)user_stacksize;
937 ts64->r9 = (uint64_t)uap->flags;
938 /*
939 * set stack pointer aligned to 16 byte boundary
940 */
941 ts64->rsp = (uint64_t)(th_stack - C_64_REDZONE_LEN);
942
943 thread_set_wq_state64(th, (thread_state_t)ts64);
944 }
945 }
946 #elif defined(__arm__)
947 {
948 int flavor=0, count=0;
949 void * state;
950
951 kret = thread_getstatus(th, flavor, (thread_state_t)&state, &count);
952 if (kret != KERN_SUCCESS) {
953 error = EINVAL;
954 goto out1;
955 }
956
957 /* XXX ARM TODO */
958
959 kret = thread_setstatus(th, flavor, (thread_state_t)&state, count);
960 if (kret != KERN_SUCCESS)
961 error = EINVAL;
962 goto out1;
963 }
964 #else
965 #error bsdthread_create not defined for this architecture
966 #endif
967 /* Set scheduling parameters if needed */
968 if ((flags & PTHREAD_START_SETSCHED) != 0) {
969 thread_extended_policy_data_t extinfo;
970 thread_precedence_policy_data_t precedinfo;
971
972 importance = (flags & PTHREAD_START_IMPORTANCE_MASK);
973 policy = (flags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK;
974
975 if (policy == SCHED_OTHER)
976 extinfo.timeshare = 1;
977 else
978 extinfo.timeshare = 0;
979 thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT);
980
981 precedinfo.importance = importance;
982 thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
983 }
984
985 kret = thread_resume(th);
986 if (kret != KERN_SUCCESS) {
987 error = EINVAL;
988 goto out1;
989 }
990 thread_deallocate(th); /* drop the creator reference */
991 #if 0
992 KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_END, error, (unsigned int)th_pthread, 0, 0, 0);
993 #endif
994 *retval = th_pthread;
995
996 return(0);
997
998 out1:
999 if (allocated != 0)
1000 (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize);
1001 out:
1002 (void)mach_port_deallocate(get_task_ipcspace(ctask), (mach_port_name_t)th_thport);
1003 (void)thread_terminate(th);
1004 (void)thread_deallocate(th);
1005 return(error);
1006 }
1007
1008 int
1009 bsdthread_terminate(__unused struct proc *p, struct bsdthread_terminate_args *uap, __unused register_t *retval)
1010 {
1011 mach_vm_offset_t freeaddr;
1012 mach_vm_size_t freesize;
1013 kern_return_t kret;
1014 mach_port_name_t kthport = (mach_port_name_t)uap->port;
1015 mach_port_name_t sem = (mach_port_name_t)uap->sem;
1016
1017 freeaddr = (mach_vm_offset_t)uap->stackaddr;
1018 freesize = uap->freesize;
1019
1020 #if 0
1021 KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_START, (unsigned int)freeaddr, (unsigned int)freesize, (unsigned int)kthport, 0xff, 0);
1022 #endif
1023 if (sem != MACH_PORT_NULL) {
1024 kret = semaphore_signal_internal_trap(sem);
1025 if (kret != KERN_SUCCESS) {
1026 return(EINVAL);
1027 }
1028 }
1029 if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) {
1030 kret = mach_vm_deallocate(current_map(), freeaddr, freesize);
1031 if (kret != KERN_SUCCESS) {
1032 return(EINVAL);
1033 }
1034 }
1035
1036 (void) thread_terminate(current_thread());
1037 if (kthport != MACH_PORT_NULL)
1038 mach_port_deallocate(get_task_ipcspace(current_task()), kthport);
1039 thread_exception_return();
1040 panic("bsdthread_terminate: still running\n");
1041 #if 0
1042 KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_END, 0, 0, 0, 0xff, 0);
1043 #endif
1044 return(0);
1045 }
1046
1047
1048 int
1049 bsdthread_register(struct proc *p, struct bsdthread_register_args *uap, __unused register_t *retval)
1050 {
1051 /* syscall randomizer test can pass bogus values */
1052 if (uap->pthsize > MAX_PTHREAD_SIZE) {
1053 return(EINVAL);
1054 }
1055 p->p_threadstart = uap->threadstart;
1056 p->p_wqthread = uap->wqthread;
1057 p->p_pthsize = uap->pthsize;
1058
1059 return(0);
1060 }
1061
1062
1063
1064
1065 int wq_stalled_window_usecs = WQ_STALLED_WINDOW_USECS;
1066 int wq_reduce_pool_window_usecs = WQ_REDUCE_POOL_WINDOW_USECS;
1067 int wq_max_run_latency_usecs = WQ_MAX_RUN_LATENCY_USECS;
1068 int wq_timer_interval_msecs = WQ_TIMER_INTERVAL_MSECS;
1069
1070
1071 SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW,
1072 &wq_stalled_window_usecs, 0, "");
1073
1074 SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW,
1075 &wq_reduce_pool_window_usecs, 0, "");
1076
1077 SYSCTL_INT(_kern, OID_AUTO, wq_max_run_latency_usecs, CTLFLAG_RW,
1078 &wq_max_run_latency_usecs, 0, "");
1079
1080 SYSCTL_INT(_kern, OID_AUTO, wq_timer_interval_msecs, CTLFLAG_RW,
1081 &wq_timer_interval_msecs, 0, "");
1082
1083
1084
1085
1086 void
1087 workqueue_init_lock(proc_t p)
1088 {
1089 lck_mtx_init(&p->p_wqlock, pthread_lck_grp, pthread_lck_attr);
1090 }
1091
1092 void
1093 workqueue_destroy_lock(proc_t p)
1094 {
1095 lck_mtx_destroy(&p->p_wqlock, pthread_lck_grp);
1096 }
1097
1098 static void
1099 workqueue_lock(proc_t p)
1100 {
1101 lck_mtx_lock(&p->p_wqlock);
1102 }
1103
1104 static void
1105 workqueue_lock_spin(proc_t p)
1106 {
1107 lck_mtx_lock_spin(&p->p_wqlock);
1108 }
1109
1110 static void
1111 workqueue_unlock(proc_t p)
1112 {
1113 lck_mtx_unlock(&p->p_wqlock);
1114 }
1115
1116
1117
1118 static void
1119 workqueue_interval_timer_start(thread_call_t call, int interval_in_ms)
1120 {
1121 uint64_t deadline;
1122
1123 clock_interval_to_deadline(interval_in_ms, 1000 * 1000, &deadline);
1124
1125 thread_call_enter_delayed(call, deadline);
1126 }
1127
1128
1129 static void
1130 workqueue_timer(struct workqueue *wq, __unused int param1)
1131 {
1132 struct timeval tv, dtv;
1133 uint32_t i;
1134 boolean_t added_more_threads = FALSE;
1135 boolean_t reset_maxactive = FALSE;
1136 boolean_t restart_timer = FALSE;
1137
1138 microuptime(&tv);
1139
1140 KERNEL_DEBUG(0xefffd108, (int)wq, 0, 0, 0, 0);
1141
1142 /*
1143 * check to see if the stall frequency was beyond our tolerance
1144 * or we have work on the queue, but haven't scheduled any
1145 * new work within our acceptable time interval because
1146 * there were no idle threads left to schedule
1147 *
1148 * WQ_TIMER_WATCH will only be set if we have 1 or more affinity
1149 * groups that have stalled (no active threads and no idle threads)...
1150 * it will not be set if all affinity groups have at least 1 thread
1151 * that is currently runnable... if all processors have a runnable
1152 * thread, there is no need to add more threads even if we're not
1153 * scheduling new work within our allowed window... it just means
1154 * that the work items are taking a long time to complete.
1155 */
1156 if (wq->wq_flags & (WQ_ADD_TO_POOL | WQ_TIMER_WATCH)) {
1157
1158 if (wq->wq_flags & WQ_ADD_TO_POOL)
1159 added_more_threads = TRUE;
1160 else {
1161 timersub(&tv, &wq->wq_lastran_ts, &dtv);
1162
1163 if (((dtv.tv_sec * 1000000) + dtv.tv_usec) > wq_stalled_window_usecs)
1164 added_more_threads = TRUE;
1165 }
1166 if (added_more_threads == TRUE) {
1167 for (i = 0; i < wq->wq_affinity_max && wq->wq_nthreads < WORKQUEUE_MAXTHREADS; i++) {
1168 (void)workqueue_addnewthread(wq);
1169 }
1170 }
1171 }
1172 timersub(&tv, &wq->wq_reduce_ts, &dtv);
1173
1174 if (((dtv.tv_sec * 1000000) + dtv.tv_usec) > wq_reduce_pool_window_usecs)
1175 reset_maxactive = TRUE;
1176
1177 /*
1178 * if the pool size has grown beyond the minimum number
1179 * of threads needed to keep all of the processors busy, and
1180 * the maximum number of threads scheduled concurrently during
1181 * the last sample period didn't exceed half the current pool
1182 * size, then its time to trim the pool size back
1183 */
1184 if (added_more_threads == FALSE &&
1185 reset_maxactive == TRUE &&
1186 wq->wq_nthreads > wq->wq_affinity_max &&
1187 wq->wq_max_threads_scheduled <= (wq->wq_nthreads / 2)) {
1188 uint32_t nthreads_to_remove;
1189
1190 if ((nthreads_to_remove = (wq->wq_nthreads / 4)) == 0)
1191 nthreads_to_remove = 1;
1192
1193 for (i = 0; i < nthreads_to_remove && wq->wq_nthreads > wq->wq_affinity_max; i++)
1194 workqueue_removethread(wq);
1195 }
1196 workqueue_lock_spin(wq->wq_proc);
1197
1198 if (reset_maxactive == TRUE) {
1199 wq->wq_max_threads_scheduled = 0;
1200 microuptime(&wq->wq_reduce_ts);
1201 }
1202 if (added_more_threads) {
1203 wq->wq_flags &= ~(WQ_ADD_TO_POOL | WQ_TIMER_WATCH);
1204
1205 /*
1206 * since we added more threads, we should be
1207 * able to run some work if its still available
1208 */
1209 workqueue_run_nextitem(wq->wq_proc, THREAD_NULL);
1210 workqueue_lock_spin(wq->wq_proc);
1211 }
1212 if ((wq->wq_nthreads > wq->wq_affinity_max) ||
1213 (wq->wq_flags & WQ_TIMER_WATCH)) {
1214 restart_timer = TRUE;
1215 } else
1216 wq->wq_flags &= ~WQ_TIMER_RUNNING;
1217
1218 workqueue_unlock(wq->wq_proc);
1219
1220 /*
1221 * we needed to knock down the WQ_TIMER_RUNNING flag while behind
1222 * the workqueue lock... however, we don't want to hold the lock
1223 * while restarting the timer and we certainly don't want 2 or more
1224 * instances of the timer... so set a local to indicate the need
1225 * for a restart since the state of wq_flags may change once we
1226 * drop the workqueue lock...
1227 */
1228 if (restart_timer == TRUE)
1229 workqueue_interval_timer_start(wq->wq_timer_call, wq_timer_interval_msecs);
1230 }
1231
1232
1233 static void
1234 workqueue_callback(
1235 int type,
1236 thread_t thread)
1237 {
1238 struct uthread *uth;
1239 struct threadlist *tl;
1240 struct workqueue *wq;
1241
1242 uth = get_bsdthread_info(thread);
1243 tl = uth->uu_threadlist;
1244 wq = tl->th_workq;
1245
1246 switch (type) {
1247
1248 case SCHED_CALL_BLOCK:
1249 {
1250 uint32_t old_activecount;
1251
1252 old_activecount = OSAddAtomic(-1, (SInt32 *)&wq->wq_thactivecount[tl->th_affinity_tag]);
1253
1254 if (old_activecount == 1 && wq->wq_itemcount) {
1255 /*
1256 * we were the last active thread on this affinity set
1257 * and we've got work to do
1258 */
1259 workqueue_lock_spin(wq->wq_proc);
1260 /*
1261 * if this thread is blocking (not parking)
1262 * and the idle list is empty for this affinity group
1263 * we'll count it as a 'stall'
1264 */
1265 if ((tl->th_flags & TH_LIST_RUNNING) &&
1266 TAILQ_EMPTY(&wq->wq_thidlelist[tl->th_affinity_tag]))
1267 wq->wq_stalled_count++;
1268
1269 workqueue_run_nextitem(wq->wq_proc, THREAD_NULL);
1270 /*
1271 * workqueue_run_nextitem will drop the workqueue
1272 * lock before it returns
1273 */
1274 }
1275 KERNEL_DEBUG(0xefffd020, (int)thread, wq->wq_threads_scheduled, tl->th_affinity_tag, 0, 0);
1276 }
1277 break;
1278
1279 case SCHED_CALL_UNBLOCK:
1280 /*
1281 * we cannot take the workqueue_lock here...
1282 * an UNBLOCK can occur from a timer event which
1283 * is run from an interrupt context... if the workqueue_lock
1284 * is already held by this processor, we'll deadlock...
1285 * the thread lock for the thread being UNBLOCKED
1286 * is also held
1287 */
1288 if (tl->th_unparked)
1289 OSAddAtomic(-1, (SInt32 *)&tl->th_unparked);
1290 else
1291 OSAddAtomic(1, (SInt32 *)&wq->wq_thactivecount[tl->th_affinity_tag]);
1292
1293 KERNEL_DEBUG(0xefffd024, (int)thread, wq->wq_threads_scheduled, tl->th_affinity_tag, 0, 0);
1294 break;
1295 }
1296 }
1297
1298 static void
1299 workqueue_removethread(struct workqueue *wq)
1300 {
1301 struct threadlist *tl;
1302 uint32_t i, affinity_tag = 0;
1303
1304 tl = NULL;
1305
1306 workqueue_lock_spin(wq->wq_proc);
1307
1308 for (i = 0; i < wq->wq_affinity_max; i++) {
1309
1310 affinity_tag = wq->wq_nextaffinitytag;
1311
1312 if (affinity_tag == 0)
1313 affinity_tag = wq->wq_affinity_max - 1;
1314 else
1315 affinity_tag--;
1316 wq->wq_nextaffinitytag = affinity_tag;
1317
1318 /*
1319 * look for an idle thread to steal from this affinity group
1320 * but don't grab the only thread associated with it
1321 */
1322 if (!TAILQ_EMPTY(&wq->wq_thidlelist[affinity_tag]) && wq->wq_thcount[affinity_tag] > 1) {
1323 tl = TAILQ_FIRST(&wq->wq_thidlelist[affinity_tag]);
1324 TAILQ_REMOVE(&wq->wq_thidlelist[affinity_tag], tl, th_entry);
1325
1326 wq->wq_nthreads--;
1327 wq->wq_thcount[affinity_tag]--;
1328
1329 break;
1330 }
1331 }
1332 workqueue_unlock(wq->wq_proc);
1333
1334 if (tl != NULL) {
1335 thread_sched_call(tl->th_thread, NULL);
1336
1337 if ( (tl->th_flags & TH_LIST_BLOCKED) )
1338 wakeup(tl);
1339 else {
1340 /*
1341 * thread was created, but never used...
1342 * need to clean up the stack and port ourselves
1343 * since we're not going to spin up through the
1344 * normal exit path triggered from Libc
1345 */
1346 (void)mach_vm_deallocate(wq->wq_map, tl->th_stackaddr, tl->th_allocsize);
1347 (void)mach_port_deallocate(get_task_ipcspace(wq->wq_task), (mach_port_name_t)tl->th_thport);
1348
1349 thread_terminate(tl->th_thread);
1350 }
1351 KERNEL_DEBUG(0xefffd030, (int)tl->th_thread, wq->wq_nthreads, tl->th_flags & TH_LIST_BLOCKED, 0, 0);
1352 /*
1353 * drop our ref on the thread
1354 */
1355 thread_deallocate(tl->th_thread);
1356
1357 kfree(tl, sizeof(struct threadlist));
1358 }
1359 }
1360
1361
1362 static int
1363 workqueue_addnewthread(struct workqueue *wq)
1364 {
1365 struct threadlist *tl;
1366 struct uthread *uth;
1367 kern_return_t kret;
1368 thread_t th;
1369 proc_t p;
1370 void *sright;
1371 mach_vm_offset_t stackaddr;
1372 uint32_t affinity_tag;
1373
1374 p = wq->wq_proc;
1375
1376 kret = thread_create(wq->wq_task, &th);
1377
1378 if (kret != KERN_SUCCESS)
1379 return(EINVAL);
1380
1381 tl = kalloc(sizeof(struct threadlist));
1382 bzero(tl, sizeof(struct threadlist));
1383
1384 #if defined(__ppc__)
1385 stackaddr = 0xF0000000;
1386 #elif defined(__i386__)
1387 stackaddr = 0xB0000000;
1388 #elif defined(__arm__)
1389 stackaddr = 0xB0000000; /* XXX ARM */
1390 #else
1391 #error Need to define a stack address hint for this architecture
1392 #endif
1393 tl->th_allocsize = PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE + p->p_pthsize;
1394
1395 kret = mach_vm_map(wq->wq_map, &stackaddr,
1396 tl->th_allocsize,
1397 page_size-1,
1398 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
1399 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
1400 VM_INHERIT_DEFAULT);
1401
1402 if (kret != KERN_SUCCESS) {
1403 kret = mach_vm_allocate(wq->wq_map,
1404 &stackaddr, tl->th_allocsize,
1405 VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
1406 }
1407 if (kret == KERN_SUCCESS) {
1408 /*
1409 * The guard page is at the lowest address
1410 * The stack base is the highest address
1411 */
1412 kret = mach_vm_protect(wq->wq_map, stackaddr, PTH_DEFAULT_GUARDSIZE, FALSE, VM_PROT_NONE);
1413
1414 if (kret != KERN_SUCCESS)
1415 (void) mach_vm_deallocate(wq->wq_map, stackaddr, tl->th_allocsize);
1416 }
1417 if (kret != KERN_SUCCESS) {
1418 (void) thread_terminate(th);
1419
1420 kfree(tl, sizeof(struct threadlist));
1421
1422 return(EINVAL);
1423 }
1424 thread_reference(th);
1425
1426 sright = (void *) convert_thread_to_port(th);
1427 tl->th_thport = (void *)ipc_port_copyout_send(sright, get_task_ipcspace(wq->wq_task));
1428
1429 thread_static_param(th, TRUE);
1430
1431 workqueue_lock_spin(p);
1432
1433 affinity_tag = wq->wq_nextaffinitytag;
1434 wq->wq_nextaffinitytag = (affinity_tag + 1) % wq->wq_affinity_max;
1435
1436 workqueue_unlock(p);
1437
1438 tl->th_flags = TH_LIST_INITED | TH_LIST_SUSPENDED;
1439
1440 tl->th_thread = th;
1441 tl->th_workq = wq;
1442 tl->th_stackaddr = stackaddr;
1443 tl->th_affinity_tag = affinity_tag;
1444
1445 #if defined(__ppc__)
1446 //ml_fp_setvalid(FALSE);
1447 thread_set_cthreadself(th, (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE), IS_64BIT_PROCESS(p));
1448 #endif /* __ppc__ */
1449 /*
1450 * affinity tag of 0 means no affinity...
1451 * but we want our tags to be 0 based because they
1452 * are used to index arrays, so...
1453 * keep it 0 based internally and bump by 1 when
1454 * calling out to set it
1455 */
1456 (void)thread_affinity_set(th, affinity_tag + 1);
1457 thread_sched_call(th, workqueue_callback);
1458
1459 uth = get_bsdthread_info(tl->th_thread);
1460 uth->uu_threadlist = (void *)tl;
1461
1462 workqueue_lock_spin(p);
1463
1464 TAILQ_INSERT_TAIL(&wq->wq_thidlelist[tl->th_affinity_tag], tl, th_entry);
1465 wq->wq_nthreads++;
1466 wq->wq_thcount[affinity_tag]++;
1467
1468 KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_START, (int)current_thread(), affinity_tag, wq->wq_nthreads, 0, (int)tl->th_thread);
1469
1470 /*
1471 * work may have come into the queue while
1472 * no threads were available to run... since
1473 * we're adding a new thread, go evaluate the
1474 * current state
1475 */
1476 workqueue_run_nextitem(p, THREAD_NULL);
1477 /*
1478 * workqueue_run_nextitem is responsible for
1479 * dropping the workqueue lock in all cases
1480 */
1481
1482 return(0);
1483 }
1484
1485 int
1486 workq_open(__unused struct proc *p, __unused struct workq_open_args *uap, __unused register_t *retval)
1487 {
1488 struct workqueue * wq;
1489 int size;
1490 char * ptr;
1491 int j;
1492 uint32_t i;
1493 int error = 0;
1494 int num_cpus;
1495 struct workitem * witem;
1496 struct workitemlist *wl;
1497
1498 workqueue_lock(p);
1499
1500 if (p->p_wqptr == NULL) {
1501 num_cpus = ml_get_max_cpus();
1502
1503 size = (sizeof(struct workqueue)) +
1504 (num_cpus * sizeof(int *)) +
1505 (num_cpus * sizeof(TAILQ_HEAD(, threadlist)));
1506
1507 ptr = (char *)kalloc(size);
1508 bzero(ptr, size);
1509
1510 wq = (struct workqueue *)ptr;
1511 wq->wq_flags = WQ_LIST_INITED;
1512 wq->wq_proc = p;
1513 wq->wq_affinity_max = num_cpus;
1514 wq->wq_task = current_task();
1515 wq->wq_map = current_map();
1516
1517 for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
1518 wl = (struct workitemlist *)&wq->wq_list[i];
1519 TAILQ_INIT(&wl->wl_itemlist);
1520 TAILQ_INIT(&wl->wl_freelist);
1521
1522 for (j = 0; j < WORKITEM_SIZE; j++) {
1523 witem = &wq->wq_array[(i*WORKITEM_SIZE) + j];
1524 TAILQ_INSERT_TAIL(&wl->wl_freelist, witem, wi_entry);
1525 }
1526 }
1527 wq->wq_thactivecount = (uint32_t *)((char *)ptr + sizeof(struct workqueue));
1528 wq->wq_thcount = (uint32_t *)&wq->wq_thactivecount[wq->wq_affinity_max];
1529 wq->wq_thidlelist = (struct wq_thidlelist *)&wq->wq_thcount[wq->wq_affinity_max];
1530
1531 for (i = 0; i < wq->wq_affinity_max; i++)
1532 TAILQ_INIT(&wq->wq_thidlelist[i]);
1533
1534 TAILQ_INIT(&wq->wq_thrunlist);
1535
1536 p->p_wqptr = (void *)wq;
1537 p->p_wqsize = size;
1538
1539 workqueue_unlock(p);
1540
1541 wq->wq_timer_call = thread_call_allocate((thread_call_func_t)workqueue_timer, (thread_call_param_t)wq);
1542
1543 for (i = 0; i < wq->wq_affinity_max; i++) {
1544 (void)workqueue_addnewthread(wq);
1545 }
1546 /* If unable to create any threads, return error */
1547 if (wq->wq_nthreads == 0)
1548 error = EINVAL;
1549 workqueue_lock_spin(p);
1550
1551 microuptime(&wq->wq_reduce_ts);
1552 microuptime(&wq->wq_lastran_ts);
1553 wq->wq_max_threads_scheduled = 0;
1554 wq->wq_stalled_count = 0;
1555 }
1556 workqueue_unlock(p);
1557
1558 return(error);
1559 }
1560
1561 int
1562 workq_ops(struct proc *p, struct workq_ops_args *uap, __unused register_t *retval)
1563 {
1564 int options = uap->options;
1565 int prio = uap->prio; /* should be used to find the right workqueue */
1566 user_addr_t item = uap->item;
1567 int error = 0;
1568 thread_t th = THREAD_NULL;
1569 struct workqueue *wq;
1570
1571 prio += 2; /* normalize prio -2 to +2 to 0 -4 */
1572
1573 switch (options) {
1574
1575 case WQOPS_QUEUE_ADD: {
1576
1577 KERNEL_DEBUG(0xefffd008 | DBG_FUNC_NONE, (int)item, 0, 0, 0, 0);
1578
1579 workqueue_lock_spin(p);
1580
1581 if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
1582 workqueue_unlock(p);
1583 return (EINVAL);
1584 }
1585 error = workqueue_additem(wq, prio, item);
1586
1587 }
1588 break;
1589 case WQOPS_QUEUE_REMOVE: {
1590
1591 workqueue_lock_spin(p);
1592
1593 if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
1594 workqueue_unlock(p);
1595 return (EINVAL);
1596 }
1597 error = workqueue_removeitem(wq, prio, item);
1598 }
1599 break;
1600 case WQOPS_THREAD_RETURN: {
1601
1602 th = current_thread();
1603
1604 KERNEL_DEBUG(0xefffd004 | DBG_FUNC_END, 0, 0, 0, 0, 0);
1605
1606 workqueue_lock_spin(p);
1607
1608 if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
1609 workqueue_unlock(p);
1610 return (EINVAL);
1611 }
1612 }
1613 break;
1614 default:
1615 return (EINVAL);
1616 }
1617 workqueue_run_nextitem(p, th);
1618 /*
1619 * workqueue_run_nextitem is responsible for
1620 * dropping the workqueue lock in all cases
1621 */
1622 return(error);
1623 }
1624
1625 void
1626 workqueue_exit(struct proc *p)
1627 {
1628 struct workqueue * wq;
1629 struct threadlist * tl, *tlist;
1630 uint32_t i;
1631
1632 if (p->p_wqptr != NULL) {
1633
1634 workqueue_lock_spin(p);
1635
1636 wq = (struct workqueue *)p->p_wqptr;
1637 p->p_wqptr = NULL;
1638
1639 workqueue_unlock(p);
1640
1641 if (wq == NULL)
1642 return;
1643
1644 if (wq->wq_flags & WQ_TIMER_RUNNING)
1645 thread_call_cancel(wq->wq_timer_call);
1646 thread_call_free(wq->wq_timer_call);
1647
1648 TAILQ_FOREACH_SAFE(tl, &wq->wq_thrunlist, th_entry, tlist) {
1649 /*
1650 * drop our last ref on the thread
1651 */
1652 thread_sched_call(tl->th_thread, NULL);
1653 thread_deallocate(tl->th_thread);
1654
1655 TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
1656 kfree(tl, sizeof(struct threadlist));
1657 }
1658 for (i = 0; i < wq->wq_affinity_max; i++) {
1659 TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlelist[i], th_entry, tlist) {
1660 /*
1661 * drop our last ref on the thread
1662 */
1663 thread_sched_call(tl->th_thread, NULL);
1664 thread_deallocate(tl->th_thread);
1665
1666 TAILQ_REMOVE(&wq->wq_thidlelist[i], tl, th_entry);
1667 kfree(tl, sizeof(struct threadlist));
1668 }
1669 }
1670 kfree(wq, p->p_wqsize);
1671 }
1672 }
1673
1674 static int
1675 workqueue_additem(struct workqueue *wq, int prio, user_addr_t item)
1676 {
1677 struct workitem *witem;
1678 struct workitemlist *wl;
1679
1680 wl = (struct workitemlist *)&wq->wq_list[prio];
1681
1682 if (TAILQ_EMPTY(&wl->wl_freelist))
1683 return (ENOMEM);
1684
1685 witem = (struct workitem *)TAILQ_FIRST(&wl->wl_freelist);
1686 TAILQ_REMOVE(&wl->wl_freelist, witem, wi_entry);
1687
1688 witem->wi_item = item;
1689 TAILQ_INSERT_TAIL(&wl->wl_itemlist, witem, wi_entry);
1690
1691 if (wq->wq_itemcount == 0) {
1692 microuptime(&wq->wq_lastran_ts);
1693 wq->wq_stalled_count = 0;
1694 }
1695 wq->wq_itemcount++;
1696
1697 return (0);
1698 }
1699
1700 static int
1701 workqueue_removeitem(struct workqueue *wq, int prio, user_addr_t item)
1702 {
1703 struct workitem *witem;
1704 struct workitemlist *wl;
1705 int error = ESRCH;
1706
1707 wl = (struct workitemlist *)&wq->wq_list[prio];
1708
1709 TAILQ_FOREACH(witem, &wl->wl_itemlist, wi_entry) {
1710 if (witem->wi_item == item) {
1711 TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry);
1712 wq->wq_itemcount--;
1713
1714 witem->wi_item = (user_addr_t)0;
1715 TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry);
1716
1717 error = 0;
1718 break;
1719 }
1720 }
1721 if (wq->wq_itemcount == 0)
1722 wq->wq_flags &= ~(WQ_ADD_TO_POOL | WQ_TIMER_WATCH);
1723
1724 return (error);
1725 }
1726
1727 /*
1728 * workqueue_run_nextitem:
1729 * called with the workqueue lock held...
1730 * responsible for dropping it in all cases
1731 */
1732 static void
1733 workqueue_run_nextitem(proc_t p, thread_t thread)
1734 {
1735 struct workqueue *wq;
1736 struct workitem *witem = NULL;
1737 user_addr_t item = 0;
1738 thread_t th_to_run = THREAD_NULL;
1739 thread_t th_to_park = THREAD_NULL;
1740 int wake_thread = 0;
1741 int reuse_thread = 1;
1742 uint32_t stalled_affinity_count = 0;
1743 int i;
1744 uint32_t affinity_tag;
1745 struct threadlist *tl = NULL;
1746 struct uthread *uth = NULL;
1747 struct workitemlist *wl;
1748 boolean_t start_timer = FALSE;
1749 struct timeval tv, lat_tv;
1750
1751 wq = (struct workqueue *)p->p_wqptr;
1752
1753 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_START, (int)thread, wq->wq_threads_scheduled, wq->wq_stalled_count, 0, 0);
1754
1755 if (wq->wq_itemcount == 0) {
1756 if ((th_to_park = thread) == THREAD_NULL)
1757 goto out;
1758 goto parkit;
1759 }
1760 if (thread != THREAD_NULL) {
1761 /*
1762 * we're a worker thread from the pool... currently we
1763 * are considered 'active' which means we're counted
1764 * in "wq_thactivecount"
1765 */
1766 uth = get_bsdthread_info(thread);
1767 tl = uth->uu_threadlist;
1768
1769 if (wq->wq_thactivecount[tl->th_affinity_tag] == 1) {
1770 /*
1771 * we're the only active thread associated with our
1772 * affinity group, so pick up some work and keep going
1773 */
1774 th_to_run = thread;
1775 goto pick_up_work;
1776 }
1777 }
1778 for (affinity_tag = 0; affinity_tag < wq->wq_affinity_max; affinity_tag++) {
1779 /*
1780 * look for first affinity group that is currently not active
1781 * and has at least 1 idle thread
1782 */
1783 if (wq->wq_thactivecount[affinity_tag] == 0) {
1784 if (!TAILQ_EMPTY(&wq->wq_thidlelist[affinity_tag]))
1785 break;
1786 stalled_affinity_count++;
1787 }
1788 }
1789 if (thread == THREAD_NULL) {
1790 /*
1791 * we're not one of the 'worker' threads
1792 */
1793 if (affinity_tag >= wq->wq_affinity_max) {
1794 /*
1795 * we've already got at least 1 thread per
1796 * affinity group in the active state... or
1797 * we've got no idle threads to play with
1798 */
1799 if (stalled_affinity_count) {
1800
1801 if ( !(wq->wq_flags & WQ_TIMER_RUNNING) ) {
1802 wq->wq_flags |= WQ_TIMER_RUNNING;
1803 start_timer = TRUE;
1804 }
1805 wq->wq_flags |= WQ_TIMER_WATCH;
1806 }
1807 goto out;
1808 }
1809 } else {
1810 /*
1811 * we're overbooked on the affinity group we're associated with,
1812 * so park this thread
1813 */
1814 th_to_park = thread;
1815
1816 if (affinity_tag >= wq->wq_affinity_max) {
1817 /*
1818 * all the affinity groups have active threads
1819 * running, or there are no idle threads to
1820 * schedule
1821 */
1822 if (stalled_affinity_count) {
1823
1824 if ( !(wq->wq_flags & WQ_TIMER_RUNNING) ) {
1825 wq->wq_flags |= WQ_TIMER_RUNNING;
1826 start_timer = TRUE;
1827 }
1828 wq->wq_flags |= WQ_TIMER_WATCH;
1829 }
1830 goto parkit;
1831 }
1832 /*
1833 * we've got a candidate (affinity group with no currently
1834 * active threads) to start a new thread on...
1835 * we already know there is both work available
1836 * and an idle thread with the correct affinity tag, so
1837 * fall into the code that pulls a new thread and workitem...
1838 * once we've kicked that thread off, we'll park this one
1839 */
1840 }
1841 tl = TAILQ_FIRST(&wq->wq_thidlelist[affinity_tag]);
1842 TAILQ_REMOVE(&wq->wq_thidlelist[affinity_tag], tl, th_entry);
1843
1844 th_to_run = tl->th_thread;
1845 TAILQ_INSERT_TAIL(&wq->wq_thrunlist, tl, th_entry);
1846
1847 if ((tl->th_flags & TH_LIST_SUSPENDED) == TH_LIST_SUSPENDED) {
1848 tl->th_flags &= ~TH_LIST_SUSPENDED;
1849 reuse_thread = 0;
1850 } else if ((tl->th_flags & TH_LIST_BLOCKED) == TH_LIST_BLOCKED) {
1851 tl->th_flags &= ~TH_LIST_BLOCKED;
1852 wake_thread = 1;
1853 }
1854 tl->th_flags |= TH_LIST_RUNNING;
1855
1856 wq->wq_threads_scheduled++;
1857
1858 if (wq->wq_threads_scheduled > wq->wq_max_threads_scheduled)
1859 wq->wq_max_threads_scheduled = wq->wq_threads_scheduled;
1860
1861 pick_up_work:
1862 for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
1863 wl = (struct workitemlist *)&wq->wq_list[i];
1864
1865 if (!(TAILQ_EMPTY(&wl->wl_itemlist))) {
1866
1867 witem = TAILQ_FIRST(&wl->wl_itemlist);
1868 TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry);
1869 wq->wq_itemcount--;
1870
1871 item = witem->wi_item;
1872 witem->wi_item = (user_addr_t)0;
1873 TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry);
1874
1875 break;
1876 }
1877 }
1878 if (witem == NULL)
1879 panic("workq_run_nextitem: NULL witem");
1880
1881 if (thread != th_to_run) {
1882 /*
1883 * we're starting up a thread from a parked/suspended condition
1884 */
1885 OSAddAtomic(1, (SInt32 *)&wq->wq_thactivecount[tl->th_affinity_tag]);
1886 OSAddAtomic(1, (SInt32 *)&tl->th_unparked);
1887 }
1888 if (wq->wq_itemcount == 0)
1889 wq->wq_flags &= ~WQ_TIMER_WATCH;
1890 else {
1891 microuptime(&tv);
1892 /*
1893 * if we had any affinity groups stall (no threads runnable)
1894 * since we last scheduled an item... and
1895 * the elapsed time since we last scheduled an item
1896 * exceeds the latency tolerance...
1897 * we ask the timer thread (which should already be running)
1898 * to add some more threads to the pool
1899 */
1900 if (wq->wq_stalled_count && !(wq->wq_flags & WQ_ADD_TO_POOL)) {
1901 timersub(&tv, &wq->wq_lastran_ts, &lat_tv);
1902
1903 if (((lat_tv.tv_sec * 1000000) + lat_tv.tv_usec) > wq_max_run_latency_usecs)
1904 wq->wq_flags |= WQ_ADD_TO_POOL;
1905
1906 KERNEL_DEBUG(0xefffd10c, wq->wq_stalled_count, lat_tv.tv_sec, lat_tv.tv_usec, wq->wq_flags, 0);
1907 }
1908 wq->wq_lastran_ts = tv;
1909 }
1910 wq->wq_stalled_count = 0;
1911 workqueue_unlock(p);
1912
1913 KERNEL_DEBUG(0xefffd02c, wq->wq_thactivecount[0], wq->wq_thactivecount[1],
1914 wq->wq_thactivecount[2], wq->wq_thactivecount[3], 0);
1915
1916 KERNEL_DEBUG(0xefffd02c, wq->wq_thactivecount[4], wq->wq_thactivecount[5],
1917 wq->wq_thactivecount[6], wq->wq_thactivecount[7], 0);
1918
1919 /*
1920 * if current thread is reused for workitem, does not return via unix_syscall
1921 */
1922 wq_runitem(p, item, th_to_run, tl, reuse_thread, wake_thread, (thread == th_to_run));
1923
1924 if (th_to_park == THREAD_NULL) {
1925
1926 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, (int)thread, (int)item, wq->wq_flags, 1, 0);
1927
1928 return;
1929 }
1930 workqueue_lock_spin(p);
1931
1932 parkit:
1933 wq->wq_threads_scheduled--;
1934 /*
1935 * this is a workqueue thread with no more
1936 * work to do... park it for now
1937 */
1938 uth = get_bsdthread_info(th_to_park);
1939 tl = uth->uu_threadlist;
1940 if (tl == 0)
1941 panic("wq thread with no threadlist ");
1942
1943 TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
1944 tl->th_flags &= ~TH_LIST_RUNNING;
1945
1946 tl->th_flags |= TH_LIST_BLOCKED;
1947 TAILQ_INSERT_HEAD(&wq->wq_thidlelist[tl->th_affinity_tag], tl, th_entry);
1948
1949 assert_wait((caddr_t)tl, (THREAD_INTERRUPTIBLE));
1950
1951 workqueue_unlock(p);
1952
1953 if (start_timer)
1954 workqueue_interval_timer_start(wq->wq_timer_call, wq_timer_interval_msecs);
1955
1956 KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_START, (int)current_thread(), wq->wq_threads_scheduled, 0, 0, (int)th_to_park);
1957
1958 thread_block((thread_continue_t)thread_exception_return);
1959
1960 panic("unexpected return from thread_block");
1961
1962 out:
1963 workqueue_unlock(p);
1964
1965 if (start_timer)
1966 workqueue_interval_timer_start(wq->wq_timer_call, wq_timer_interval_msecs);
1967
1968 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, (int)thread, 0, wq->wq_flags, 2, 0);
1969
1970 return;
1971 }
1972
1973 static void
1974 wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
1975 int reuse_thread, int wake_thread, int return_directly)
1976 {
1977 int ret = 0;
1978
1979 KERNEL_DEBUG1(0xefffd004 | DBG_FUNC_START, (int)current_thread(), (int)item, wake_thread, tl->th_affinity_tag, (int)th);
1980
1981 ret = setup_wqthread(p, th, item, reuse_thread, tl);
1982
1983 if (ret != 0)
1984 panic("setup_wqthread failed %x\n", ret);
1985
1986 if (return_directly) {
1987 thread_exception_return();
1988
1989 panic("wq_runitem: thread_exception_return returned ...\n");
1990 }
1991 if (wake_thread) {
1992 KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_END, (int)current_thread(), 0, 0, 0, (int)th);
1993
1994 wakeup(tl);
1995 } else {
1996 KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END, (int)current_thread(), 0, 0, 0, (int)th);
1997
1998 thread_resume(th);
1999 }
2000 }
2001
2002
2003 int
2004 setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl)
2005 {
2006
2007 #if defined(__ppc__)
2008 /*
2009 * Set up PowerPC registers...
2010 * internally they are always kept as 64 bit and
2011 * since the register set is the same between 32 and 64bit modes
2012 * we don't need 2 different methods for setting the state
2013 */
2014 {
2015 ppc_thread_state64_t state64;
2016 ppc_thread_state64_t *ts64 = &state64;
2017
2018 ts64->srr0 = (uint64_t)p->p_wqthread;
2019 ts64->r1 = (uint64_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_ARGSAVE_LEN - C_RED_ZONE);
2020 ts64->r3 = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE);
2021 ts64->r4 = (uint64_t)((unsigned int)tl->th_thport);
2022 ts64->r5 = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE);
2023 ts64->r6 = (uint64_t)item;
2024 ts64->r7 = (uint64_t)reuse_thread;
2025 ts64->r8 = (uint64_t)0;
2026
2027 thread_set_wq_state64(th, (thread_state_t)ts64);
2028 }
2029 #elif defined(__i386__)
2030 int isLP64 = 0;
2031
2032 isLP64 = IS_64BIT_PROCESS(p);
2033 /*
2034 * Set up i386 registers & function call.
2035 */
2036 if (isLP64 == 0) {
2037 x86_thread_state32_t state;
2038 x86_thread_state32_t *ts = &state;
2039
2040 ts->eip = (int)p->p_wqthread;
2041 ts->eax = (unsigned int)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE);
2042 ts->ebx = (unsigned int)tl->th_thport;
2043 ts->ecx = (unsigned int)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE);
2044 ts->edx = (unsigned int)item;
2045 ts->edi = (unsigned int)reuse_thread;
2046 ts->esi = (unsigned int)0;
2047 /*
2048 * set stack pointer
2049 */
2050 ts->esp = (int)((vm_offset_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_32_STK_ALIGN));
2051
2052 thread_set_wq_state32(th, (thread_state_t)ts);
2053
2054 } else {
2055 x86_thread_state64_t state64;
2056 x86_thread_state64_t *ts64 = &state64;
2057
2058 ts64->rip = (uint64_t)p->p_wqthread;
2059 ts64->rdi = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE);
2060 ts64->rsi = (uint64_t)((unsigned int)(tl->th_thport));
2061 ts64->rdx = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE);
2062 ts64->rcx = (uint64_t)item;
2063 ts64->r8 = (uint64_t)reuse_thread;
2064 ts64->r9 = (uint64_t)0;
2065
2066 /*
2067 * set stack pointer aligned to 16 byte boundary
2068 */
2069 ts64->rsp = (uint64_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_64_REDZONE_LEN);
2070
2071 thread_set_wq_state64(th, (thread_state_t)ts64);
2072 }
2073 #elif defined(__arm__)
2074 arm_thread_state_t state;
2075 arm_thread_state_t *ts = &state;
2076
2077 /* XXX ARM add more */
2078 ts->pc = p->p_wqthread;
2079 ts->sp = tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE;
2080
2081 thread_set_wq_state32(th, (thread_state_t)ts);
2082 #else
2083 #error setup_wqthread not defined for this architecture
2084 #endif
2085 return(0);
2086 }
2087