]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/sched_prim.c
xnu-517.tar.gz
[apple/xnu.git] / osfmk / kern / sched_prim.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /*
26 * @OSF_FREE_COPYRIGHT@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
31 * All Rights Reserved.
32 *
33 * Permission to use, copy, modify and distribute this software and its
34 * documentation is hereby granted, provided that both the copyright
35 * notice and this permission notice appear in all copies of the
36 * software, derivative works or modified versions, and any portions
37 * thereof, and that both notices appear in supporting documentation.
38 *
39 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
40 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
41 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
42 *
43 * Carnegie Mellon requests users of this software to return to
44 *
45 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
46 * School of Computer Science
47 * Carnegie Mellon University
48 * Pittsburgh PA 15213-3890
49 *
50 * any improvements or extensions that they make and grant Carnegie Mellon
51 * the rights to redistribute these changes.
52 */
53 /*
54 */
55 /*
56 * File: sched_prim.c
57 * Author: Avadis Tevanian, Jr.
58 * Date: 1986
59 *
60 * Scheduling primitives
61 *
62 */
63
64 #include <debug.h>
65 #include <cpus.h>
66 #include <mach_kdb.h>
67 #include <simple_clock.h>
68
69 #include <ddb/db_output.h>
70 #include <mach/machine.h>
71 #include <machine/machine_routines.h>
72 #include <machine/sched_param.h>
73 #include <kern/ast.h>
74 #include <kern/clock.h>
75 #include <kern/counters.h>
76 #include <kern/cpu_number.h>
77 #include <kern/cpu_data.h>
78 #include <kern/etap_macros.h>
79 #include <kern/lock.h>
80 #include <kern/macro_help.h>
81 #include <kern/machine.h>
82 #include <kern/misc_protos.h>
83 #include <kern/processor.h>
84 #include <kern/queue.h>
85 #include <kern/sched.h>
86 #include <kern/sched_prim.h>
87 #include <kern/syscall_subr.h>
88 #include <kern/task.h>
89 #include <kern/thread.h>
90 #include <kern/thread_swap.h>
91 #include <vm/pmap.h>
92 #include <vm/vm_kern.h>
93 #include <vm/vm_map.h>
94 #include <mach/policy.h>
95 #include <mach/sync_policy.h>
96 #include <kern/mk_sp.h> /*** ??? fix so this can be removed ***/
97 #include <sys/kdebug.h>
98
99 #define DEFAULT_PREEMPTION_RATE 100 /* (1/s) */
100 int default_preemption_rate = DEFAULT_PREEMPTION_RATE;
101
102 #define MAX_UNSAFE_QUANTA 800
103 int max_unsafe_quanta = MAX_UNSAFE_QUANTA;
104
105 #define MAX_POLL_QUANTA 2
106 int max_poll_quanta = MAX_POLL_QUANTA;
107
108 #define SCHED_POLL_YIELD_SHIFT 4 /* 1/16 */
109 int sched_poll_yield_shift = SCHED_POLL_YIELD_SHIFT;
110
111 uint32_t std_quantum_us;
112
113 uint64_t max_unsafe_computation;
114 uint32_t sched_safe_duration;
115 uint64_t max_poll_computation;
116
117 uint32_t std_quantum;
118 uint32_t min_std_quantum;
119
120 uint32_t max_rt_quantum;
121 uint32_t min_rt_quantum;
122
123 static uint32_t sched_tick_interval;
124
125 unsigned sched_tick;
126
127 #if SIMPLE_CLOCK
128 int sched_usec;
129 #endif /* SIMPLE_CLOCK */
130
131 /* Forwards */
132 void wait_queues_init(void);
133
134 static thread_t choose_thread(
135 processor_set_t pset,
136 processor_t processor);
137
138 static void do_thread_scan(void);
139
140 #if DEBUG
141 static
142 boolean_t thread_runnable(
143 thread_t thread);
144
145 #endif /*DEBUG*/
146
147
148 /*
149 * State machine
150 *
151 * states are combinations of:
152 * R running
153 * W waiting (or on wait queue)
154 * N non-interruptible
155 * O swapped out
156 * I being swapped in
157 *
158 * init action
159 * assert_wait thread_block clear_wait swapout swapin
160 *
161 * R RW, RWN R; setrun - -
162 * RN RWN RN; setrun - -
163 *
164 * RW W R -
165 * RWN WN RN -
166 *
167 * W R; setrun WO
168 * WN RN; setrun -
169 *
170 * RO - - R
171 *
172 */
173
174 /*
175 * Waiting protocols and implementation:
176 *
177 * Each thread may be waiting for exactly one event; this event
178 * is set using assert_wait(). That thread may be awakened either
179 * by performing a thread_wakeup_prim() on its event,
180 * or by directly waking that thread up with clear_wait().
181 *
182 * The implementation of wait events uses a hash table. Each
183 * bucket is queue of threads having the same hash function
184 * value; the chain for the queue (linked list) is the run queue
185 * field. [It is not possible to be waiting and runnable at the
186 * same time.]
187 *
188 * Locks on both the thread and on the hash buckets govern the
189 * wait event field and the queue chain field. Because wakeup
190 * operations only have the event as an argument, the event hash
191 * bucket must be locked before any thread.
192 *
193 * Scheduling operations may also occur at interrupt level; therefore,
194 * interrupts below splsched() must be prevented when holding
195 * thread or hash bucket locks.
196 *
197 * The wait event hash table declarations are as follows:
198 */
199
200 #define NUMQUEUES 59
201
202 struct wait_queue wait_queues[NUMQUEUES];
203
204 #define wait_hash(event) \
205 ((((int)(event) < 0)? ~(int)(event): (int)(event)) % NUMQUEUES)
206
207 void
208 sched_init(void)
209 {
210 /*
211 * Calculate the timeslicing quantum
212 * in us.
213 */
214 if (default_preemption_rate < 1)
215 default_preemption_rate = DEFAULT_PREEMPTION_RATE;
216 std_quantum_us = (1000 * 1000) / default_preemption_rate;
217
218 printf("standard timeslicing quantum is %d us\n", std_quantum_us);
219
220 sched_safe_duration = (2 * max_unsafe_quanta / default_preemption_rate) *
221 (1 << SCHED_TICK_SHIFT);
222
223 wait_queues_init();
224 pset_sys_bootstrap(); /* initialize processor mgmt. */
225 sched_tick = 0;
226 #if SIMPLE_CLOCK
227 sched_usec = 0;
228 #endif /* SIMPLE_CLOCK */
229 ast_init();
230 }
231
232 void
233 sched_timebase_init(void)
234 {
235 uint64_t abstime;
236
237 clock_interval_to_absolutetime_interval(
238 std_quantum_us, NSEC_PER_USEC, &abstime);
239 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
240 std_quantum = abstime;
241
242 /* 250 us */
243 clock_interval_to_absolutetime_interval(250, NSEC_PER_USEC, &abstime);
244 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
245 min_std_quantum = abstime;
246
247 /* 50 us */
248 clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime);
249 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
250 min_rt_quantum = abstime;
251
252 /* 50 ms */
253 clock_interval_to_absolutetime_interval(
254 50, 1000*NSEC_PER_USEC, &abstime);
255 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
256 max_rt_quantum = abstime;
257
258 clock_interval_to_absolutetime_interval(1000 >> SCHED_TICK_SHIFT,
259 USEC_PER_SEC, &abstime);
260 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
261 sched_tick_interval = abstime;
262
263 max_unsafe_computation = max_unsafe_quanta * std_quantum;
264 max_poll_computation = max_poll_quanta * std_quantum;
265 }
266
267 void
268 wait_queues_init(void)
269 {
270 register int i;
271
272 for (i = 0; i < NUMQUEUES; i++) {
273 wait_queue_init(&wait_queues[i], SYNC_POLICY_FIFO);
274 }
275 }
276
277 /*
278 * Thread wait timer expiration.
279 */
280 void
281 thread_timer_expire(
282 timer_call_param_t p0,
283 timer_call_param_t p1)
284 {
285 thread_t thread = p0;
286 spl_t s;
287
288 s = splsched();
289 thread_lock(thread);
290 if (--thread->wait_timer_active == 1) {
291 if (thread->wait_timer_is_set) {
292 thread->wait_timer_is_set = FALSE;
293 clear_wait_internal(thread, THREAD_TIMED_OUT);
294 }
295 }
296 thread_unlock(thread);
297 splx(s);
298 }
299
300 /*
301 * thread_set_timer:
302 *
303 * Set a timer for the current thread, if the thread
304 * is ready to wait. Must be called between assert_wait()
305 * and thread_block().
306 */
307 void
308 thread_set_timer(
309 uint32_t interval,
310 uint32_t scale_factor)
311 {
312 thread_t thread = current_thread();
313 uint64_t deadline;
314 spl_t s;
315
316 s = splsched();
317 thread_lock(thread);
318 if ((thread->state & TH_WAIT) != 0) {
319 clock_interval_to_deadline(interval, scale_factor, &deadline);
320 timer_call_enter(&thread->wait_timer, deadline);
321 assert(!thread->wait_timer_is_set);
322 thread->wait_timer_active++;
323 thread->wait_timer_is_set = TRUE;
324 }
325 thread_unlock(thread);
326 splx(s);
327 }
328
329 void
330 thread_set_timer_deadline(
331 uint64_t deadline)
332 {
333 thread_t thread = current_thread();
334 spl_t s;
335
336 s = splsched();
337 thread_lock(thread);
338 if ((thread->state & TH_WAIT) != 0) {
339 timer_call_enter(&thread->wait_timer, deadline);
340 assert(!thread->wait_timer_is_set);
341 thread->wait_timer_active++;
342 thread->wait_timer_is_set = TRUE;
343 }
344 thread_unlock(thread);
345 splx(s);
346 }
347
348 void
349 thread_cancel_timer(void)
350 {
351 thread_t thread = current_thread();
352 spl_t s;
353
354 s = splsched();
355 thread_lock(thread);
356 if (thread->wait_timer_is_set) {
357 if (timer_call_cancel(&thread->wait_timer))
358 thread->wait_timer_active--;
359 thread->wait_timer_is_set = FALSE;
360 }
361 thread_unlock(thread);
362 splx(s);
363 }
364
365 /*
366 * Set up thread timeout element when thread is created.
367 */
368 void
369 thread_timer_setup(
370 thread_t thread)
371 {
372 extern void thread_depress_expire(
373 timer_call_param_t p0,
374 timer_call_param_t p1);
375
376 timer_call_setup(&thread->wait_timer, thread_timer_expire, thread);
377 thread->wait_timer_is_set = FALSE;
378 thread->wait_timer_active = 1;
379
380 timer_call_setup(&thread->depress_timer, thread_depress_expire, thread);
381 thread->depress_timer_active = 1;
382
383 thread->ref_count++;
384 }
385
386 void
387 thread_timer_terminate(void)
388 {
389 thread_t thread = current_thread();
390 wait_result_t res;
391 spl_t s;
392
393 s = splsched();
394 thread_lock(thread);
395 if (thread->wait_timer_is_set) {
396 if (timer_call_cancel(&thread->wait_timer))
397 thread->wait_timer_active--;
398 thread->wait_timer_is_set = FALSE;
399 }
400
401 thread->wait_timer_active--;
402
403 while (thread->wait_timer_active > 0) {
404 thread_unlock(thread);
405 splx(s);
406
407 delay(1);
408
409 s = splsched();
410 thread_lock(thread);
411 }
412
413 thread->depress_timer_active--;
414
415 while (thread->depress_timer_active > 0) {
416 thread_unlock(thread);
417 splx(s);
418
419 delay(1);
420
421 s = splsched();
422 thread_lock(thread);
423 }
424
425 thread_unlock(thread);
426 splx(s);
427
428 thread_deallocate(thread);
429 }
430
431 /*
432 * Routine: thread_go_locked
433 * Purpose:
434 * Start a thread running.
435 * Conditions:
436 * thread lock held, IPC locks may be held.
437 * thread must have been pulled from wait queue under same lock hold.
438 * Returns:
439 * KERN_SUCCESS - Thread was set running
440 * KERN_NOT_WAITING - Thread was not waiting
441 */
442 kern_return_t
443 thread_go_locked(
444 thread_t thread,
445 wait_result_t wresult)
446 {
447 assert(thread->at_safe_point == FALSE);
448 assert(thread->wait_event == NO_EVENT64);
449 assert(thread->wait_queue == WAIT_QUEUE_NULL);
450
451 if ((thread->state & (TH_WAIT|TH_TERMINATE)) == TH_WAIT) {
452 thread_roust_t roust_hint;
453
454 thread->state &= ~(TH_WAIT|TH_UNINT);
455 _mk_sp_thread_unblock(thread);
456
457 roust_hint = thread->roust;
458 thread->roust = NULL;
459 if ( roust_hint != NULL &&
460 (*roust_hint)(thread, wresult) ) {
461 if (thread->wait_timer_is_set) {
462 if (timer_call_cancel(&thread->wait_timer))
463 thread->wait_timer_active--;
464 thread->wait_timer_is_set = FALSE;
465 }
466
467 return (KERN_SUCCESS);
468 }
469
470 thread->wait_result = wresult;
471
472 if (!(thread->state & TH_RUN)) {
473 thread->state |= TH_RUN;
474
475 if (thread->active_callout)
476 call_thread_unblock();
477
478 pset_run_incr(thread->processor_set);
479 if (thread->sched_mode & TH_MODE_TIMESHARE)
480 pset_share_incr(thread->processor_set);
481
482 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
483 }
484
485 KERNEL_DEBUG_CONSTANT(
486 MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE,
487 (int)thread, (int)thread->sched_pri, 0, 0, 0);
488
489 return (KERN_SUCCESS);
490 }
491
492 return (KERN_NOT_WAITING);
493 }
494
495 /*
496 * Routine: thread_mark_wait_locked
497 * Purpose:
498 * Mark a thread as waiting. If, given the circumstances,
499 * it doesn't want to wait (i.e. already aborted), then
500 * indicate that in the return value.
501 * Conditions:
502 * at splsched() and thread is locked.
503 */
504 __private_extern__
505 wait_result_t
506 thread_mark_wait_locked(
507 thread_t thread,
508 wait_interrupt_t interruptible)
509 {
510 boolean_t at_safe_point;
511
512 /*
513 * The thread may have certain types of interrupts/aborts masked
514 * off. Even if the wait location says these types of interrupts
515 * are OK, we have to honor mask settings (outer-scoped code may
516 * not be able to handle aborts at the moment).
517 */
518 if (interruptible > thread->interrupt_level)
519 interruptible = thread->interrupt_level;
520
521 at_safe_point = (interruptible == THREAD_ABORTSAFE);
522
523 if ( interruptible == THREAD_UNINT ||
524 !(thread->state & TH_ABORT) ||
525 (!at_safe_point &&
526 (thread->state & TH_ABORT_SAFELY)) ) {
527 thread->state |= (interruptible) ? TH_WAIT : (TH_WAIT | TH_UNINT);
528 thread->at_safe_point = at_safe_point;
529 thread->sleep_stamp = sched_tick;
530 return (thread->wait_result = THREAD_WAITING);
531 }
532 else
533 if (thread->state & TH_ABORT_SAFELY)
534 thread->state &= ~(TH_ABORT|TH_ABORT_SAFELY);
535
536 return (thread->wait_result = THREAD_INTERRUPTED);
537 }
538
539 /*
540 * Routine: thread_interrupt_level
541 * Purpose:
542 * Set the maximum interruptible state for the
543 * current thread. The effective value of any
544 * interruptible flag passed into assert_wait
545 * will never exceed this.
546 *
547 * Useful for code that must not be interrupted,
548 * but which calls code that doesn't know that.
549 * Returns:
550 * The old interrupt level for the thread.
551 */
552 __private_extern__
553 wait_interrupt_t
554 thread_interrupt_level(
555 wait_interrupt_t new_level)
556 {
557 thread_t thread = current_thread();
558 wait_interrupt_t result = thread->interrupt_level;
559
560 thread->interrupt_level = new_level;
561 return result;
562 }
563
564 /*
565 * Routine: assert_wait_timeout
566 * Purpose:
567 * Assert that the thread intends to block,
568 * waiting for a timeout (no user known event).
569 */
570 unsigned int assert_wait_timeout_event;
571
572 wait_result_t
573 assert_wait_timeout(
574 mach_msg_timeout_t msecs,
575 wait_interrupt_t interruptible)
576 {
577 wait_result_t res;
578
579 res = assert_wait((event_t)&assert_wait_timeout_event, interruptible);
580 if (res == THREAD_WAITING)
581 thread_set_timer(msecs, 1000*NSEC_PER_USEC);
582 return res;
583 }
584
585 /*
586 * Check to see if an assert wait is possible, without actually doing one.
587 * This is used by debug code in locks and elsewhere to verify that it is
588 * always OK to block when trying to take a blocking lock (since waiting
589 * for the actual assert_wait to catch the case may make it hard to detect
590 * this case.
591 */
592 boolean_t
593 assert_wait_possible(void)
594 {
595
596 thread_t thread;
597 extern unsigned int debug_mode;
598
599 #if DEBUG
600 if(debug_mode) return TRUE; /* Always succeed in debug mode */
601 #endif
602
603 thread = current_thread();
604
605 return (thread == NULL || wait_queue_assert_possible(thread));
606 }
607
608 /*
609 * assert_wait:
610 *
611 * Assert that the current thread is about to go to
612 * sleep until the specified event occurs.
613 */
614 wait_result_t
615 assert_wait(
616 event_t event,
617 wait_interrupt_t interruptible)
618 {
619 register wait_queue_t wq;
620 register int index;
621
622 assert(event != NO_EVENT);
623
624 index = wait_hash(event);
625 wq = &wait_queues[index];
626 return wait_queue_assert_wait(wq, event, interruptible);
627 }
628
629 __private_extern__
630 wait_queue_t
631 wait_event_wait_queue(
632 event_t event)
633 {
634 assert(event != NO_EVENT);
635
636 return (&wait_queues[wait_hash(event)]);
637 }
638
639 wait_result_t
640 assert_wait_prim(
641 event_t event,
642 thread_roust_t roust_hint,
643 uint64_t deadline,
644 wait_interrupt_t interruptible)
645 {
646 thread_t thread = current_thread();
647 wait_result_t wresult;
648 wait_queue_t wq;
649 spl_t s;
650
651 assert(event != NO_EVENT);
652
653 wq = &wait_queues[wait_hash(event)];
654
655 s = splsched();
656 wait_queue_lock(wq);
657 thread_lock(thread);
658
659 wresult = wait_queue_assert_wait64_locked(wq, (uint32_t)event,
660 interruptible, thread);
661 if (wresult == THREAD_WAITING) {
662 if (roust_hint != NULL)
663 thread->roust = roust_hint;
664
665 if (deadline != 0) {
666 timer_call_enter(&thread->wait_timer, deadline);
667 assert(!thread->wait_timer_is_set);
668 thread->wait_timer_active++;
669 thread->wait_timer_is_set = TRUE;
670 }
671 }
672
673 thread_unlock(thread);
674 wait_queue_unlock(wq);
675 splx(s);
676
677 return (wresult);
678 }
679
680 /*
681 * thread_sleep_fast_usimple_lock:
682 *
683 * Cause the current thread to wait until the specified event
684 * occurs. The specified simple_lock is unlocked before releasing
685 * the cpu and re-acquired as part of waking up.
686 *
687 * This is the simple lock sleep interface for components that use a
688 * faster version of simple_lock() than is provided by usimple_lock().
689 */
690 __private_extern__ wait_result_t
691 thread_sleep_fast_usimple_lock(
692 event_t event,
693 simple_lock_t lock,
694 wait_interrupt_t interruptible)
695 {
696 wait_result_t res;
697
698 res = assert_wait(event, interruptible);
699 if (res == THREAD_WAITING) {
700 simple_unlock(lock);
701 res = thread_block(THREAD_CONTINUE_NULL);
702 simple_lock(lock);
703 }
704 return res;
705 }
706
707
708 /*
709 * thread_sleep_usimple_lock:
710 *
711 * Cause the current thread to wait until the specified event
712 * occurs. The specified usimple_lock is unlocked before releasing
713 * the cpu and re-acquired as part of waking up.
714 *
715 * This is the simple lock sleep interface for components where
716 * simple_lock() is defined in terms of usimple_lock().
717 */
718 wait_result_t
719 thread_sleep_usimple_lock(
720 event_t event,
721 usimple_lock_t lock,
722 wait_interrupt_t interruptible)
723 {
724 wait_result_t res;
725
726 res = assert_wait(event, interruptible);
727 if (res == THREAD_WAITING) {
728 usimple_unlock(lock);
729 res = thread_block(THREAD_CONTINUE_NULL);
730 usimple_lock(lock);
731 }
732 return res;
733 }
734
735 /*
736 * thread_sleep_mutex:
737 *
738 * Cause the current thread to wait until the specified event
739 * occurs. The specified mutex is unlocked before releasing
740 * the cpu. The mutex will be re-acquired before returning.
741 *
742 * JMM - Add hint to make sure mutex is available before rousting
743 */
744 wait_result_t
745 thread_sleep_mutex(
746 event_t event,
747 mutex_t *mutex,
748 wait_interrupt_t interruptible)
749 {
750 wait_result_t res;
751
752 res = assert_wait(event, interruptible);
753 if (res == THREAD_WAITING) {
754 mutex_unlock(mutex);
755 res = thread_block(THREAD_CONTINUE_NULL);
756 mutex_lock(mutex);
757 }
758 return res;
759 }
760
761 /*
762 * thread_sleep_mutex_deadline:
763 *
764 * Cause the current thread to wait until the specified event
765 * (or deadline) occurs. The specified mutex is unlocked before
766 * releasing the cpu. The mutex will be re-acquired before returning.
767 *
768 * JMM - Add hint to make sure mutex is available before rousting
769 */
770 wait_result_t
771 thread_sleep_mutex_deadline(
772 event_t event,
773 mutex_t *mutex,
774 uint64_t deadline,
775 wait_interrupt_t interruptible)
776 {
777 wait_result_t res;
778
779 res = assert_wait(event, interruptible);
780 if (res == THREAD_WAITING) {
781 mutex_unlock(mutex);
782 thread_set_timer_deadline(deadline);
783 res = thread_block(THREAD_CONTINUE_NULL);
784 if (res != THREAD_TIMED_OUT)
785 thread_cancel_timer();
786 mutex_lock(mutex);
787 }
788 return res;
789 }
790
791 /*
792 * thread_sleep_lock_write:
793 *
794 * Cause the current thread to wait until the specified event
795 * occurs. The specified (write) lock is unlocked before releasing
796 * the cpu. The (write) lock will be re-acquired before returning.
797 *
798 * JMM - Add hint to make sure mutex is available before rousting
799 */
800 wait_result_t
801 thread_sleep_lock_write(
802 event_t event,
803 lock_t *lock,
804 wait_interrupt_t interruptible)
805 {
806 wait_result_t res;
807
808 res = assert_wait(event, interruptible);
809 if (res == THREAD_WAITING) {
810 lock_write_done(lock);
811 res = thread_block(THREAD_CONTINUE_NULL);
812 lock_write(lock);
813 }
814 return res;
815 }
816
817
818 /*
819 * thread_sleep_funnel:
820 *
821 * Cause the current thread to wait until the specified event
822 * occurs. If the thread is funnelled, the funnel will be released
823 * before giving up the cpu. The funnel will be re-acquired before returning.
824 *
825 * JMM - Right now the funnel is dropped and re-acquired inside
826 * thread_block(). At some point, this may give thread_block() a hint.
827 */
828 wait_result_t
829 thread_sleep_funnel(
830 event_t event,
831 wait_interrupt_t interruptible)
832 {
833 wait_result_t res;
834
835 res = assert_wait(event, interruptible);
836 if (res == THREAD_WAITING) {
837 res = thread_block(THREAD_CONTINUE_NULL);
838 }
839 return res;
840 }
841
842 /*
843 * thread_[un]stop(thread)
844 * Once a thread has blocked interruptibly (via assert_wait) prevent
845 * it from running until thread_unstop.
846 *
847 * If someone else has already stopped the thread, wait for the
848 * stop to be cleared, and then stop it again.
849 *
850 * Return FALSE if interrupted.
851 *
852 * NOTE: thread_hold/thread_suspend should be called on the activation
853 * before calling thread_stop. TH_SUSP is only recognized when
854 * a thread blocks and only prevents clear_wait/thread_wakeup
855 * from restarting an interruptible wait. The wake_active flag is
856 * used to indicate that someone is waiting on the thread.
857 */
858 boolean_t
859 thread_stop(
860 thread_t thread)
861 {
862 spl_t s = splsched();
863
864 wake_lock(thread);
865
866 while (thread->state & TH_SUSP) {
867 wait_result_t result;
868
869 thread->wake_active = TRUE;
870 result = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
871 wake_unlock(thread);
872 splx(s);
873
874 if (result == THREAD_WAITING)
875 result = thread_block(THREAD_CONTINUE_NULL);
876
877 if (result != THREAD_AWAKENED)
878 return (FALSE);
879
880 s = splsched();
881 wake_lock(thread);
882 }
883
884 thread_lock(thread);
885 thread->state |= TH_SUSP;
886
887 while (thread->state & TH_RUN) {
888 wait_result_t result;
889 processor_t processor = thread->last_processor;
890
891 if ( processor != PROCESSOR_NULL &&
892 processor->state == PROCESSOR_RUNNING &&
893 processor->active_thread == thread )
894 cause_ast_check(processor);
895 thread_unlock(thread);
896
897 thread->wake_active = TRUE;
898 result = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
899 wake_unlock(thread);
900 splx(s);
901
902 if (result == THREAD_WAITING)
903 result = thread_block(THREAD_CONTINUE_NULL);
904
905 if (result != THREAD_AWAKENED) {
906 thread_unstop(thread);
907 return (FALSE);
908 }
909
910 s = splsched();
911 wake_lock(thread);
912 thread_lock(thread);
913 }
914
915 thread_unlock(thread);
916 wake_unlock(thread);
917 splx(s);
918
919 return (TRUE);
920 }
921
922 /*
923 * Clear TH_SUSP and if the thread has been stopped and is now runnable,
924 * put it back on the run queue.
925 */
926 void
927 thread_unstop(
928 thread_t thread)
929 {
930 spl_t s = splsched();
931
932 wake_lock(thread);
933 thread_lock(thread);
934
935 if ((thread->state & (TH_RUN|TH_WAIT|TH_SUSP)) == TH_SUSP) {
936 thread->state &= ~TH_SUSP;
937 thread->state |= TH_RUN;
938
939 _mk_sp_thread_unblock(thread);
940
941 pset_run_incr(thread->processor_set);
942 if (thread->sched_mode & TH_MODE_TIMESHARE)
943 pset_share_incr(thread->processor_set);
944
945 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
946
947 KERNEL_DEBUG_CONSTANT(
948 MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE,
949 (int)thread, (int)thread->sched_pri, 0, 0, 0);
950 }
951 else
952 if (thread->state & TH_SUSP) {
953 thread->state &= ~TH_SUSP;
954
955 if (thread->wake_active) {
956 thread->wake_active = FALSE;
957 thread_unlock(thread);
958 wake_unlock(thread);
959 splx(s);
960
961 thread_wakeup(&thread->wake_active);
962 return;
963 }
964 }
965
966 thread_unlock(thread);
967 wake_unlock(thread);
968 splx(s);
969 }
970
971 /*
972 * Wait for the thread's RUN bit to clear
973 */
974 boolean_t
975 thread_wait(
976 thread_t thread)
977 {
978 spl_t s = splsched();
979
980 wake_lock(thread);
981 thread_lock(thread);
982
983 while (thread->state & TH_RUN) {
984 wait_result_t result;
985 processor_t processor = thread->last_processor;
986
987 if ( processor != PROCESSOR_NULL &&
988 processor->state == PROCESSOR_RUNNING &&
989 processor->active_thread == thread )
990 cause_ast_check(processor);
991 thread_unlock(thread);
992
993 thread->wake_active = TRUE;
994 result = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
995 wake_unlock(thread);
996 splx(s);
997
998 if (result == THREAD_WAITING)
999 result = thread_block(THREAD_CONTINUE_NULL);
1000
1001 if (result != THREAD_AWAKENED)
1002 return (FALSE);
1003
1004 s = splsched();
1005 wake_lock(thread);
1006 thread_lock(thread);
1007 }
1008
1009 thread_unlock(thread);
1010 wake_unlock(thread);
1011 splx(s);
1012
1013 return (TRUE);
1014 }
1015
1016 /*
1017 * Routine: clear_wait_internal
1018 *
1019 * Clear the wait condition for the specified thread.
1020 * Start the thread executing if that is appropriate.
1021 * Arguments:
1022 * thread thread to awaken
1023 * result Wakeup result the thread should see
1024 * Conditions:
1025 * At splsched
1026 * the thread is locked.
1027 * Returns:
1028 * KERN_SUCCESS thread was rousted out a wait
1029 * KERN_FAILURE thread was waiting but could not be rousted
1030 * KERN_NOT_WAITING thread was not waiting
1031 */
1032 __private_extern__ kern_return_t
1033 clear_wait_internal(
1034 thread_t thread,
1035 wait_result_t wresult)
1036 {
1037 wait_queue_t wq = thread->wait_queue;
1038 int i = LockTimeOut;
1039
1040 do {
1041 if (wresult == THREAD_INTERRUPTED && (thread->state & TH_UNINT))
1042 return (KERN_FAILURE);
1043
1044 if (wq != WAIT_QUEUE_NULL) {
1045 if (wait_queue_lock_try(wq)) {
1046 wait_queue_pull_thread_locked(wq, thread, TRUE);
1047 /* wait queue unlocked, thread still locked */
1048 }
1049 else {
1050 thread_unlock(thread);
1051 delay(1);
1052
1053 thread_lock(thread);
1054 if (wq != thread->wait_queue)
1055 return (KERN_NOT_WAITING);
1056
1057 continue;
1058 }
1059 }
1060
1061 return (thread_go_locked(thread, wresult));
1062 } while (--i > 0);
1063
1064 panic("clear_wait_internal: deadlock: thread=0x%x, wq=0x%x, cpu=%d\n",
1065 thread, wq, cpu_number());
1066
1067 return (KERN_FAILURE);
1068 }
1069
1070
1071 /*
1072 * clear_wait:
1073 *
1074 * Clear the wait condition for the specified thread. Start the thread
1075 * executing if that is appropriate.
1076 *
1077 * parameters:
1078 * thread thread to awaken
1079 * result Wakeup result the thread should see
1080 */
1081 kern_return_t
1082 clear_wait(
1083 thread_t thread,
1084 wait_result_t result)
1085 {
1086 kern_return_t ret;
1087 spl_t s;
1088
1089 s = splsched();
1090 thread_lock(thread);
1091 ret = clear_wait_internal(thread, result);
1092 thread_unlock(thread);
1093 splx(s);
1094 return ret;
1095 }
1096
1097
1098 /*
1099 * thread_wakeup_prim:
1100 *
1101 * Common routine for thread_wakeup, thread_wakeup_with_result,
1102 * and thread_wakeup_one.
1103 *
1104 */
1105 kern_return_t
1106 thread_wakeup_prim(
1107 event_t event,
1108 boolean_t one_thread,
1109 wait_result_t result)
1110 {
1111 register wait_queue_t wq;
1112 register int index;
1113
1114 index = wait_hash(event);
1115 wq = &wait_queues[index];
1116 if (one_thread)
1117 return (wait_queue_wakeup_one(wq, event, result));
1118 else
1119 return (wait_queue_wakeup_all(wq, event, result));
1120 }
1121
1122 /*
1123 * thread_bind:
1124 *
1125 * Force a thread to execute on the specified processor.
1126 *
1127 * Returns the previous binding. PROCESSOR_NULL means
1128 * not bound.
1129 *
1130 * XXX - DO NOT export this to users - XXX
1131 */
1132 processor_t
1133 thread_bind(
1134 register thread_t thread,
1135 processor_t processor)
1136 {
1137 processor_t prev;
1138 run_queue_t runq = RUN_QUEUE_NULL;
1139 spl_t s;
1140
1141 s = splsched();
1142 thread_lock(thread);
1143 prev = thread->bound_processor;
1144 if (prev != PROCESSOR_NULL)
1145 runq = run_queue_remove(thread);
1146
1147 thread->bound_processor = processor;
1148
1149 if (runq != RUN_QUEUE_NULL)
1150 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1151 thread_unlock(thread);
1152 splx(s);
1153
1154 return (prev);
1155 }
1156
1157 struct {
1158 uint32_t idle_pset_last,
1159 idle_pset_any,
1160 idle_bound;
1161
1162 uint32_t pset_self,
1163 pset_last,
1164 pset_other,
1165 bound_self,
1166 bound_other;
1167
1168 uint32_t realtime_self,
1169 realtime_last,
1170 realtime_other;
1171
1172 uint32_t missed_realtime,
1173 missed_other;
1174 } dispatch_counts;
1175
1176 /*
1177 * Select a thread for the current processor to run.
1178 *
1179 * May select the current thread, which must be locked.
1180 */
1181 thread_t
1182 thread_select(
1183 register processor_t processor)
1184 {
1185 register thread_t thread;
1186 processor_set_t pset;
1187 boolean_t other_runnable;
1188
1189 /*
1190 * Check for other non-idle runnable threads.
1191 */
1192 pset = processor->processor_set;
1193 thread = processor->active_thread;
1194
1195 /* Update the thread's priority */
1196 if (thread->sched_stamp != sched_tick)
1197 update_priority(thread);
1198
1199 processor->current_pri = thread->sched_pri;
1200
1201 simple_lock(&pset->sched_lock);
1202
1203 other_runnable = processor->runq.count > 0 || pset->runq.count > 0;
1204
1205 if ( thread->state == TH_RUN &&
1206 thread->processor_set == pset &&
1207 (thread->bound_processor == PROCESSOR_NULL ||
1208 thread->bound_processor == processor) ) {
1209 if ( thread->sched_pri >= BASEPRI_RTQUEUES &&
1210 first_timeslice(processor) ) {
1211 if (pset->runq.highq >= BASEPRI_RTQUEUES) {
1212 register run_queue_t runq = &pset->runq;
1213 register queue_t q;
1214
1215 q = runq->queues + runq->highq;
1216 if (((thread_t)q->next)->realtime.deadline <
1217 processor->deadline) {
1218 thread = (thread_t)q->next;
1219 ((queue_entry_t)thread)->next->prev = q;
1220 q->next = ((queue_entry_t)thread)->next;
1221 thread->runq = RUN_QUEUE_NULL;
1222 assert(thread->sched_mode & TH_MODE_PREEMPT);
1223 runq->count--; runq->urgency--;
1224 if (queue_empty(q)) {
1225 if (runq->highq != IDLEPRI)
1226 clrbit(MAXPRI - runq->highq, runq->bitmap);
1227 runq->highq = MAXPRI - ffsbit(runq->bitmap);
1228 }
1229 }
1230 }
1231
1232 processor->deadline = thread->realtime.deadline;
1233
1234 simple_unlock(&pset->sched_lock);
1235
1236 return (thread);
1237 }
1238
1239 if ( (!other_runnable ||
1240 (processor->runq.highq < thread->sched_pri &&
1241 pset->runq.highq < thread->sched_pri)) ) {
1242
1243 /* I am the highest priority runnable (non-idle) thread */
1244
1245 processor->deadline = UINT64_MAX;
1246
1247 simple_unlock(&pset->sched_lock);
1248
1249 return (thread);
1250 }
1251 }
1252
1253 if (other_runnable)
1254 thread = choose_thread(pset, processor);
1255 else {
1256 /*
1257 * Nothing is runnable, so set this processor idle if it
1258 * was running. Return its idle thread.
1259 */
1260 if (processor->state == PROCESSOR_RUNNING) {
1261 remqueue(&pset->active_queue, (queue_entry_t)processor);
1262 processor->state = PROCESSOR_IDLE;
1263
1264 enqueue_tail(&pset->idle_queue, (queue_entry_t)processor);
1265 pset->idle_count++;
1266 }
1267
1268 processor->deadline = UINT64_MAX;
1269
1270 thread = processor->idle_thread;
1271 }
1272
1273 simple_unlock(&pset->sched_lock);
1274
1275 return (thread);
1276 }
1277
1278 /*
1279 * Perform a context switch and start executing the new thread.
1280 *
1281 * If continuation is non-zero, resume the old (current) thread
1282 * next by executing at continuation on a new stack, in lieu
1283 * of returning.
1284 *
1285 * Returns TRUE if the hand-off succeeds.
1286 *
1287 * Called at splsched.
1288 */
1289
1290 #define funnel_release_check(thread, debug) \
1291 MACRO_BEGIN \
1292 if ((thread)->funnel_state & TH_FN_OWNED) { \
1293 (thread)->funnel_state = TH_FN_REFUNNEL; \
1294 KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE, \
1295 (thread)->funnel_lock, (debug), 0, 0, 0); \
1296 funnel_unlock((thread)->funnel_lock); \
1297 } \
1298 MACRO_END
1299
1300 #define funnel_refunnel_check(thread, debug) \
1301 MACRO_BEGIN \
1302 if ((thread)->funnel_state & TH_FN_REFUNNEL) { \
1303 kern_return_t result = (thread)->wait_result; \
1304 \
1305 (thread)->funnel_state = 0; \
1306 KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE, \
1307 (thread)->funnel_lock, (debug), 0, 0, 0); \
1308 funnel_lock((thread)->funnel_lock); \
1309 KERNEL_DEBUG(0x6032430 | DBG_FUNC_NONE, \
1310 (thread)->funnel_lock, (debug), 0, 0, 0); \
1311 (thread)->funnel_state = TH_FN_OWNED; \
1312 (thread)->wait_result = result; \
1313 } \
1314 MACRO_END
1315
1316 static thread_t
1317 __current_thread(void)
1318 {
1319 return (current_thread());
1320 }
1321
1322 boolean_t
1323 thread_invoke(
1324 register thread_t old_thread,
1325 register thread_t new_thread,
1326 int reason,
1327 thread_continue_t old_cont)
1328 {
1329 thread_continue_t new_cont;
1330 processor_t processor;
1331
1332 if (get_preemption_level() != 0)
1333 panic("thread_invoke: preemption_level %d\n",
1334 get_preemption_level());
1335
1336 /*
1337 * Mark thread interruptible.
1338 */
1339 thread_lock(new_thread);
1340 new_thread->state &= ~TH_UNINT;
1341
1342 assert(thread_runnable(new_thread));
1343
1344 assert(old_thread->continuation == NULL);
1345
1346 /*
1347 * Allow time constraint threads to hang onto
1348 * a stack.
1349 */
1350 if ( (old_thread->sched_mode & TH_MODE_REALTIME) &&
1351 !old_thread->reserved_stack ) {
1352 old_thread->reserved_stack = old_thread->kernel_stack;
1353 }
1354
1355 if (old_cont != NULL) {
1356 if (new_thread->state & TH_STACK_HANDOFF) {
1357 /*
1358 * If the old thread is using a privileged stack,
1359 * check to see whether we can exchange it with
1360 * that of the new thread.
1361 */
1362 if ( old_thread->kernel_stack == old_thread->reserved_stack &&
1363 !new_thread->reserved_stack)
1364 goto need_stack;
1365
1366 new_thread->state &= ~TH_STACK_HANDOFF;
1367 new_cont = new_thread->continuation;
1368 new_thread->continuation = NULL;
1369
1370 /*
1371 * Set up ast context of new thread and switch
1372 * to its timer.
1373 */
1374 processor = current_processor();
1375 processor->active_thread = new_thread;
1376 processor->current_pri = new_thread->sched_pri;
1377 new_thread->last_processor = processor;
1378 ast_context(new_thread->top_act, processor->slot_num);
1379 timer_switch(&new_thread->system_timer);
1380 thread_unlock(new_thread);
1381
1382 current_task()->csw++;
1383
1384 old_thread->reason = reason;
1385 old_thread->continuation = old_cont;
1386
1387 _mk_sp_thread_done(old_thread, new_thread, processor);
1388
1389 machine_stack_handoff(old_thread, new_thread);
1390
1391 _mk_sp_thread_begin(new_thread, processor);
1392
1393 wake_lock(old_thread);
1394 thread_lock(old_thread);
1395
1396 /*
1397 * Inline thread_dispatch but
1398 * don't free stack.
1399 */
1400
1401 switch (old_thread->state & (TH_RUN|TH_WAIT|TH_UNINT|TH_IDLE)) {
1402
1403 case TH_RUN | TH_UNINT:
1404 case TH_RUN:
1405 /*
1406 * Still running, put back
1407 * onto a run queue.
1408 */
1409 old_thread->state |= TH_STACK_HANDOFF;
1410 _mk_sp_thread_dispatch(old_thread);
1411
1412 thread_unlock(old_thread);
1413 wake_unlock(old_thread);
1414 break;
1415
1416 case TH_RUN | TH_WAIT | TH_UNINT:
1417 case TH_RUN | TH_WAIT:
1418 {
1419 boolean_t term, wake, callout;
1420
1421 /*
1422 * Waiting.
1423 */
1424 old_thread->sleep_stamp = sched_tick;
1425 old_thread->state |= TH_STACK_HANDOFF;
1426 old_thread->state &= ~TH_RUN;
1427
1428 term = (old_thread->state & TH_TERMINATE)? TRUE: FALSE;
1429 callout = old_thread->active_callout;
1430 wake = old_thread->wake_active;
1431 old_thread->wake_active = FALSE;
1432
1433 if (old_thread->sched_mode & TH_MODE_TIMESHARE)
1434 pset_share_decr(old_thread->processor_set);
1435 pset_run_decr(old_thread->processor_set);
1436
1437 thread_unlock(old_thread);
1438 wake_unlock(old_thread);
1439
1440 if (callout)
1441 call_thread_block();
1442
1443 if (wake)
1444 thread_wakeup((event_t)&old_thread->wake_active);
1445
1446 if (term)
1447 thread_reaper_enqueue(old_thread);
1448 break;
1449 }
1450
1451 case TH_RUN | TH_IDLE:
1452 /*
1453 * The idle threads don't go
1454 * onto a run queue.
1455 */
1456 old_thread->state |= TH_STACK_HANDOFF;
1457 thread_unlock(old_thread);
1458 wake_unlock(old_thread);
1459 break;
1460
1461 default:
1462 panic("thread_invoke: state 0x%x\n", old_thread->state);
1463 }
1464
1465 counter_always(c_thread_invoke_hits++);
1466
1467 funnel_refunnel_check(new_thread, 2);
1468 (void) spllo();
1469
1470 assert(new_cont);
1471 call_continuation(new_cont);
1472 /*NOTREACHED*/
1473 return (TRUE);
1474 }
1475 else
1476 if (new_thread->state & TH_STACK_ALLOC) {
1477 /*
1478 * Waiting for a stack
1479 */
1480 counter_always(c_thread_invoke_misses++);
1481 thread_unlock(new_thread);
1482 return (FALSE);
1483 }
1484 else
1485 if (new_thread == old_thread) {
1486 /* same thread but with continuation */
1487 counter(++c_thread_invoke_same);
1488 thread_unlock(new_thread);
1489
1490 funnel_refunnel_check(new_thread, 3);
1491 (void) spllo();
1492
1493 call_continuation(old_cont);
1494 /*NOTREACHED*/
1495 }
1496 }
1497 else {
1498 /*
1499 * Check that the new thread has a stack
1500 */
1501 if (new_thread->state & TH_STACK_HANDOFF) {
1502 need_stack:
1503 if (!stack_alloc_try(new_thread, thread_continue)) {
1504 counter_always(c_thread_invoke_misses++);
1505 thread_swapin(new_thread);
1506 return (FALSE);
1507 }
1508
1509 new_thread->state &= ~TH_STACK_HANDOFF;
1510 }
1511 else
1512 if (new_thread->state & TH_STACK_ALLOC) {
1513 /*
1514 * Waiting for a stack
1515 */
1516 counter_always(c_thread_invoke_misses++);
1517 thread_unlock(new_thread);
1518 return (FALSE);
1519 }
1520 else
1521 if (old_thread == new_thread) {
1522 counter(++c_thread_invoke_same);
1523 thread_unlock(new_thread);
1524 return (TRUE);
1525 }
1526 }
1527
1528 /*
1529 * Set up ast context of new thread and switch to its timer.
1530 */
1531 processor = current_processor();
1532 processor->active_thread = new_thread;
1533 processor->current_pri = new_thread->sched_pri;
1534 new_thread->last_processor = processor;
1535 ast_context(new_thread->top_act, processor->slot_num);
1536 timer_switch(&new_thread->system_timer);
1537 assert(thread_runnable(new_thread));
1538 thread_unlock(new_thread);
1539
1540 counter_always(c_thread_invoke_csw++);
1541 current_task()->csw++;
1542
1543 assert(old_thread->runq == RUN_QUEUE_NULL);
1544 old_thread->reason = reason;
1545 old_thread->continuation = old_cont;
1546
1547 _mk_sp_thread_done(old_thread, new_thread, processor);
1548
1549 /*
1550 * Here is where we actually change register context,
1551 * and address space if required. Note that control
1552 * will not return here immediately.
1553 */
1554 old_thread = machine_switch_context(old_thread, old_cont, new_thread);
1555
1556 /* Now on new thread's stack. Set a local variable to refer to it. */
1557 new_thread = __current_thread();
1558 assert(old_thread != new_thread);
1559
1560 assert(thread_runnable(new_thread));
1561 _mk_sp_thread_begin(new_thread, new_thread->last_processor);
1562
1563 /*
1564 * We're back. Now old_thread is the thread that resumed
1565 * us, and we have to dispatch it.
1566 */
1567 thread_dispatch(old_thread);
1568
1569 if (old_cont) {
1570 funnel_refunnel_check(new_thread, 3);
1571 (void) spllo();
1572
1573 call_continuation(old_cont);
1574 /*NOTREACHED*/
1575 }
1576
1577 return (TRUE);
1578 }
1579
1580 /*
1581 * thread_continue:
1582 *
1583 * Called at splsched when a thread first receives
1584 * a new stack after a continuation.
1585 */
1586 void
1587 thread_continue(
1588 register thread_t old_thread)
1589 {
1590 register thread_t self = current_thread();
1591 register thread_continue_t continuation;
1592
1593 continuation = self->continuation;
1594 self->continuation = NULL;
1595
1596 _mk_sp_thread_begin(self, self->last_processor);
1597
1598 /*
1599 * We must dispatch the old thread and then
1600 * call the current thread's continuation.
1601 * There might not be an old thread, if we are
1602 * the first thread to run on this processor.
1603 */
1604 if (old_thread != THREAD_NULL)
1605 thread_dispatch(old_thread);
1606
1607 funnel_refunnel_check(self, 4);
1608 (void)spllo();
1609
1610 call_continuation(continuation);
1611 /*NOTREACHED*/
1612 }
1613
1614 /*
1615 * thread_block_reason:
1616 *
1617 * Forces a reschedule, blocking the caller if a wait
1618 * has been asserted.
1619 *
1620 * If a continuation is specified, then thread_invoke will
1621 * attempt to discard the thread's kernel stack. When the
1622 * thread resumes, it will execute the continuation function
1623 * on a new kernel stack.
1624 */
1625 counter(mach_counter_t c_thread_block_calls = 0;)
1626
1627 int
1628 thread_block_reason(
1629 thread_continue_t continuation,
1630 ast_t reason)
1631 {
1632 register thread_t thread = current_thread();
1633 register processor_t processor;
1634 register thread_t new_thread;
1635 spl_t s;
1636
1637 counter(++c_thread_block_calls);
1638
1639 check_simple_locks();
1640
1641 s = splsched();
1642
1643 if (!(reason & AST_PREEMPT))
1644 funnel_release_check(thread, 2);
1645
1646 processor = current_processor();
1647
1648 /* If we're explicitly yielding, force a subsequent quantum */
1649 if (reason & AST_YIELD)
1650 processor->timeslice = 0;
1651
1652 /* We're handling all scheduling AST's */
1653 ast_off(AST_SCHEDULING);
1654
1655 thread_lock(thread);
1656 new_thread = thread_select(processor);
1657 assert(new_thread && thread_runnable(new_thread));
1658 thread_unlock(thread);
1659 while (!thread_invoke(thread, new_thread, reason, continuation)) {
1660 thread_lock(thread);
1661 new_thread = thread_select(processor);
1662 assert(new_thread && thread_runnable(new_thread));
1663 thread_unlock(thread);
1664 }
1665
1666 funnel_refunnel_check(thread, 5);
1667 splx(s);
1668
1669 return (thread->wait_result);
1670 }
1671
1672 /*
1673 * thread_block:
1674 *
1675 * Block the current thread if a wait has been asserted.
1676 */
1677 int
1678 thread_block(
1679 thread_continue_t continuation)
1680 {
1681 return thread_block_reason(continuation, AST_NONE);
1682 }
1683
1684 /*
1685 * thread_run:
1686 *
1687 * Switch directly from the current (old) thread to the
1688 * new thread, handing off our quantum if appropriate.
1689 *
1690 * New thread must be runnable, and not on a run queue.
1691 *
1692 * Called at splsched.
1693 */
1694 int
1695 thread_run(
1696 thread_t old_thread,
1697 thread_continue_t continuation,
1698 thread_t new_thread)
1699 {
1700 ast_t handoff = AST_HANDOFF;
1701
1702 assert(old_thread == current_thread());
1703
1704 funnel_release_check(old_thread, 3);
1705
1706 while (!thread_invoke(old_thread, new_thread, handoff, continuation)) {
1707 register processor_t processor = current_processor();
1708
1709 thread_lock(old_thread);
1710 new_thread = thread_select(processor);
1711 thread_unlock(old_thread);
1712 handoff = AST_NONE;
1713 }
1714
1715 funnel_refunnel_check(old_thread, 6);
1716
1717 return (old_thread->wait_result);
1718 }
1719
1720 /*
1721 * Dispatches a running thread that is not on a
1722 * run queue.
1723 *
1724 * Called at splsched.
1725 */
1726 void
1727 thread_dispatch(
1728 register thread_t thread)
1729 {
1730 wake_lock(thread);
1731 thread_lock(thread);
1732
1733 /*
1734 * If we are discarding the thread's stack, we must do it
1735 * before the thread has a chance to run.
1736 */
1737 #ifndef i386
1738 if (thread->continuation != NULL) {
1739 assert((thread->state & TH_STACK_STATE) == 0);
1740 thread->state |= TH_STACK_HANDOFF;
1741 stack_free(thread);
1742 }
1743 #endif
1744
1745 switch (thread->state & (TH_RUN|TH_WAIT|TH_UNINT|TH_IDLE)) {
1746
1747 case TH_RUN | TH_UNINT:
1748 case TH_RUN:
1749 /*
1750 * No reason to stop. Put back on a run queue.
1751 */
1752 _mk_sp_thread_dispatch(thread);
1753 break;
1754
1755 case TH_RUN | TH_WAIT | TH_UNINT:
1756 case TH_RUN | TH_WAIT:
1757 {
1758 boolean_t term, wake, callout;
1759
1760 /*
1761 * Waiting
1762 */
1763 thread->sleep_stamp = sched_tick;
1764 thread->state &= ~TH_RUN;
1765
1766 term = (thread->state & TH_TERMINATE)? TRUE: FALSE;
1767 callout = thread->active_callout;
1768 wake = thread->wake_active;
1769 thread->wake_active = FALSE;
1770
1771 if (thread->sched_mode & TH_MODE_TIMESHARE)
1772 pset_share_decr(thread->processor_set);
1773 pset_run_decr(thread->processor_set);
1774
1775 thread_unlock(thread);
1776 wake_unlock(thread);
1777
1778 if (callout)
1779 call_thread_block();
1780
1781 if (wake)
1782 thread_wakeup((event_t)&thread->wake_active);
1783
1784 if (term)
1785 thread_reaper_enqueue(thread);
1786
1787 return;
1788 }
1789
1790 case TH_RUN | TH_IDLE:
1791 /*
1792 * The idle threads don't go
1793 * onto a run queue.
1794 */
1795 break;
1796
1797 default:
1798 panic("thread_dispatch: state 0x%x\n", thread->state);
1799 }
1800
1801 thread_unlock(thread);
1802 wake_unlock(thread);
1803 }
1804
1805 /*
1806 * Enqueue thread on run queue. Thread must be locked,
1807 * and not already be on a run queue. Returns TRUE
1808 * if a preemption is indicated based on the state
1809 * of the run queue.
1810 *
1811 * Run queue must be locked, see run_queue_remove()
1812 * for more info.
1813 */
1814 static boolean_t
1815 run_queue_enqueue(
1816 register run_queue_t rq,
1817 register thread_t thread,
1818 integer_t options)
1819 {
1820 register int whichq = thread->sched_pri;
1821 register queue_t queue = &rq->queues[whichq];
1822 boolean_t result = FALSE;
1823
1824 assert(whichq >= MINPRI && whichq <= MAXPRI);
1825
1826 assert(thread->runq == RUN_QUEUE_NULL);
1827 if (queue_empty(queue)) {
1828 enqueue_tail(queue, (queue_entry_t)thread);
1829
1830 setbit(MAXPRI - whichq, rq->bitmap);
1831 if (whichq > rq->highq) {
1832 rq->highq = whichq;
1833 result = TRUE;
1834 }
1835 }
1836 else
1837 if (options & SCHED_HEADQ)
1838 enqueue_head(queue, (queue_entry_t)thread);
1839 else
1840 enqueue_tail(queue, (queue_entry_t)thread);
1841
1842 thread->runq = rq;
1843 if (thread->sched_mode & TH_MODE_PREEMPT)
1844 rq->urgency++;
1845 rq->count++;
1846
1847 return (result);
1848 }
1849
1850 /*
1851 * Enqueue a thread for realtime execution, similar
1852 * to above. Handles preemption directly.
1853 */
1854 static void
1855 realtime_schedule_insert(
1856 register processor_set_t pset,
1857 register thread_t thread)
1858 {
1859 register run_queue_t rq = &pset->runq;
1860 register int whichq = thread->sched_pri;
1861 register queue_t queue = &rq->queues[whichq];
1862 uint64_t deadline = thread->realtime.deadline;
1863 boolean_t try_preempt = FALSE;
1864
1865 assert(whichq >= BASEPRI_REALTIME && whichq <= MAXPRI);
1866
1867 assert(thread->runq == RUN_QUEUE_NULL);
1868 if (queue_empty(queue)) {
1869 enqueue_tail(queue, (queue_entry_t)thread);
1870
1871 setbit(MAXPRI - whichq, rq->bitmap);
1872 if (whichq > rq->highq)
1873 rq->highq = whichq;
1874 try_preempt = TRUE;
1875 }
1876 else {
1877 register thread_t entry = (thread_t)queue_first(queue);
1878
1879 while (TRUE) {
1880 if ( queue_end(queue, (queue_entry_t)entry) ||
1881 deadline < entry->realtime.deadline ) {
1882 entry = (thread_t)queue_prev((queue_entry_t)entry);
1883 break;
1884 }
1885
1886 entry = (thread_t)queue_next((queue_entry_t)entry);
1887 }
1888
1889 if ((queue_entry_t)entry == queue)
1890 try_preempt = TRUE;
1891
1892 insque((queue_entry_t)thread, (queue_entry_t)entry);
1893 }
1894
1895 thread->runq = rq;
1896 assert(thread->sched_mode & TH_MODE_PREEMPT);
1897 rq->count++; rq->urgency++;
1898
1899 if (try_preempt) {
1900 register processor_t processor;
1901
1902 processor = current_processor();
1903 if ( pset == processor->processor_set &&
1904 (thread->sched_pri > processor->current_pri ||
1905 deadline < processor->deadline ) ) {
1906 dispatch_counts.realtime_self++;
1907 simple_unlock(&pset->sched_lock);
1908
1909 ast_on(AST_PREEMPT | AST_URGENT);
1910 return;
1911 }
1912
1913 if ( pset->processor_count > 1 ||
1914 pset != processor->processor_set ) {
1915 processor_t myprocessor, lastprocessor;
1916 queue_entry_t next;
1917
1918 myprocessor = processor;
1919 processor = thread->last_processor;
1920 if ( processor != myprocessor &&
1921 processor != PROCESSOR_NULL &&
1922 processor->processor_set == pset &&
1923 processor->state == PROCESSOR_RUNNING &&
1924 (thread->sched_pri > processor->current_pri ||
1925 deadline < processor->deadline ) ) {
1926 dispatch_counts.realtime_last++;
1927 cause_ast_check(processor);
1928 simple_unlock(&pset->sched_lock);
1929 return;
1930 }
1931
1932 lastprocessor = processor;
1933 queue = &pset->active_queue;
1934 processor = (processor_t)queue_first(queue);
1935 while (!queue_end(queue, (queue_entry_t)processor)) {
1936 next = queue_next((queue_entry_t)processor);
1937
1938 if ( processor != myprocessor &&
1939 processor != lastprocessor &&
1940 (thread->sched_pri > processor->current_pri ||
1941 deadline < processor->deadline ) ) {
1942 if (!queue_end(queue, next)) {
1943 remqueue(queue, (queue_entry_t)processor);
1944 enqueue_tail(queue, (queue_entry_t)processor);
1945 }
1946 dispatch_counts.realtime_other++;
1947 cause_ast_check(processor);
1948 simple_unlock(&pset->sched_lock);
1949 return;
1950 }
1951
1952 processor = (processor_t)next;
1953 }
1954 }
1955 }
1956
1957 simple_unlock(&pset->sched_lock);
1958 }
1959
1960 /*
1961 * thread_setrun:
1962 *
1963 * Dispatch thread for execution, directly onto an idle
1964 * processor if possible. Else put on appropriate run
1965 * queue. (local if bound, else processor set)
1966 *
1967 * Thread must be locked.
1968 */
1969 void
1970 thread_setrun(
1971 register thread_t new_thread,
1972 integer_t options)
1973 {
1974 register processor_t processor;
1975 register processor_set_t pset;
1976 register thread_t thread;
1977 ast_t preempt = (options & SCHED_PREEMPT)?
1978 AST_PREEMPT: AST_NONE;
1979
1980 assert(thread_runnable(new_thread));
1981
1982 /*
1983 * Update priority if needed.
1984 */
1985 if (new_thread->sched_stamp != sched_tick)
1986 update_priority(new_thread);
1987
1988 /*
1989 * Check for urgent preemption.
1990 */
1991 if (new_thread->sched_mode & TH_MODE_PREEMPT)
1992 preempt = (AST_PREEMPT | AST_URGENT);
1993
1994 assert(new_thread->runq == RUN_QUEUE_NULL);
1995
1996 if ((processor = new_thread->bound_processor) == PROCESSOR_NULL) {
1997 /*
1998 * First try to dispatch on
1999 * the last processor.
2000 */
2001 pset = new_thread->processor_set;
2002 processor = new_thread->last_processor;
2003 if ( pset->processor_count > 1 &&
2004 processor != PROCESSOR_NULL &&
2005 processor->state == PROCESSOR_IDLE ) {
2006 processor_lock(processor);
2007 simple_lock(&pset->sched_lock);
2008 if ( processor->processor_set == pset &&
2009 processor->state == PROCESSOR_IDLE ) {
2010 remqueue(&pset->idle_queue, (queue_entry_t)processor);
2011 pset->idle_count--;
2012 processor->next_thread = new_thread;
2013 if (new_thread->sched_pri >= BASEPRI_RTQUEUES)
2014 processor->deadline = new_thread->realtime.deadline;
2015 else
2016 processor->deadline = UINT64_MAX;
2017 processor->state = PROCESSOR_DISPATCHING;
2018 dispatch_counts.idle_pset_last++;
2019 simple_unlock(&pset->sched_lock);
2020 processor_unlock(processor);
2021 if (processor != current_processor())
2022 machine_signal_idle(processor);
2023 return;
2024 }
2025 processor_unlock(processor);
2026 }
2027 else
2028 simple_lock(&pset->sched_lock);
2029
2030 /*
2031 * Next pick any idle processor
2032 * in the processor set.
2033 */
2034 if (pset->idle_count > 0) {
2035 processor = (processor_t)dequeue_head(&pset->idle_queue);
2036 pset->idle_count--;
2037 processor->next_thread = new_thread;
2038 if (new_thread->sched_pri >= BASEPRI_RTQUEUES)
2039 processor->deadline = new_thread->realtime.deadline;
2040 else
2041 processor->deadline = UINT64_MAX;
2042 processor->state = PROCESSOR_DISPATCHING;
2043 dispatch_counts.idle_pset_any++;
2044 simple_unlock(&pset->sched_lock);
2045 if (processor != current_processor())
2046 machine_signal_idle(processor);
2047 return;
2048 }
2049
2050 if (new_thread->sched_pri >= BASEPRI_RTQUEUES)
2051 realtime_schedule_insert(pset, new_thread);
2052 else {
2053 if (!run_queue_enqueue(&pset->runq, new_thread, options))
2054 preempt = AST_NONE;
2055
2056 /*
2057 * Update the timesharing quanta.
2058 */
2059 timeshare_quanta_update(pset);
2060
2061 /*
2062 * Preempt check.
2063 */
2064 if (preempt != AST_NONE) {
2065 /*
2066 * First try the current processor
2067 * if it is a member of the correct
2068 * processor set.
2069 */
2070 processor = current_processor();
2071 thread = processor->active_thread;
2072 if ( pset == processor->processor_set &&
2073 csw_needed(thread, processor) ) {
2074 dispatch_counts.pset_self++;
2075 simple_unlock(&pset->sched_lock);
2076
2077 ast_on(preempt);
2078 return;
2079 }
2080
2081 /*
2082 * If that failed and we have other
2083 * processors available keep trying.
2084 */
2085 if ( pset->processor_count > 1 ||
2086 pset != processor->processor_set ) {
2087 queue_t queue = &pset->active_queue;
2088 processor_t myprocessor, lastprocessor;
2089 queue_entry_t next;
2090
2091 /*
2092 * Next try the last processor
2093 * dispatched on.
2094 */
2095 myprocessor = processor;
2096 processor = new_thread->last_processor;
2097 if ( processor != myprocessor &&
2098 processor != PROCESSOR_NULL &&
2099 processor->processor_set == pset &&
2100 processor->state == PROCESSOR_RUNNING &&
2101 new_thread->sched_pri > processor->current_pri ) {
2102 dispatch_counts.pset_last++;
2103 cause_ast_check(processor);
2104 simple_unlock(&pset->sched_lock);
2105 return;
2106 }
2107
2108 /*
2109 * Lastly, pick any other
2110 * available processor.
2111 */
2112 lastprocessor = processor;
2113 processor = (processor_t)queue_first(queue);
2114 while (!queue_end(queue, (queue_entry_t)processor)) {
2115 next = queue_next((queue_entry_t)processor);
2116
2117 if ( processor != myprocessor &&
2118 processor != lastprocessor &&
2119 new_thread->sched_pri >
2120 processor->current_pri ) {
2121 if (!queue_end(queue, next)) {
2122 remqueue(queue, (queue_entry_t)processor);
2123 enqueue_tail(queue, (queue_entry_t)processor);
2124 }
2125 dispatch_counts.pset_other++;
2126 cause_ast_check(processor);
2127 simple_unlock(&pset->sched_lock);
2128 return;
2129 }
2130
2131 processor = (processor_t)next;
2132 }
2133 }
2134 }
2135
2136 simple_unlock(&pset->sched_lock);
2137 }
2138 }
2139 else {
2140 /*
2141 * Bound, can only run on bound processor. Have to lock
2142 * processor here because it may not be the current one.
2143 */
2144 processor_lock(processor);
2145 pset = processor->processor_set;
2146 if (pset != PROCESSOR_SET_NULL) {
2147 simple_lock(&pset->sched_lock);
2148 if (processor->state == PROCESSOR_IDLE) {
2149 remqueue(&pset->idle_queue, (queue_entry_t)processor);
2150 pset->idle_count--;
2151 processor->next_thread = new_thread;
2152 processor->deadline = UINT64_MAX;
2153 processor->state = PROCESSOR_DISPATCHING;
2154 dispatch_counts.idle_bound++;
2155 simple_unlock(&pset->sched_lock);
2156 processor_unlock(processor);
2157 if (processor != current_processor())
2158 machine_signal_idle(processor);
2159 return;
2160 }
2161 }
2162
2163 if (!run_queue_enqueue(&processor->runq, new_thread, options))
2164 preempt = AST_NONE;
2165
2166 if (preempt != AST_NONE) {
2167 if (processor == current_processor()) {
2168 thread = processor->active_thread;
2169 if (csw_needed(thread, processor)) {
2170 dispatch_counts.bound_self++;
2171 ast_on(preempt);
2172 }
2173 }
2174 else
2175 if ( processor->state == PROCESSOR_RUNNING &&
2176 new_thread->sched_pri > processor->current_pri ) {
2177 dispatch_counts.bound_other++;
2178 cause_ast_check(processor);
2179 }
2180 }
2181
2182 if (pset != PROCESSOR_SET_NULL)
2183 simple_unlock(&pset->sched_lock);
2184
2185 processor_unlock(processor);
2186 }
2187 }
2188
2189 /*
2190 * Check for a possible preemption point in
2191 * the (current) thread.
2192 *
2193 * Called at splsched.
2194 */
2195 ast_t
2196 csw_check(
2197 thread_t thread,
2198 processor_t processor)
2199 {
2200 int current_pri = thread->sched_pri;
2201 ast_t result = AST_NONE;
2202 run_queue_t runq;
2203
2204 if (first_timeslice(processor)) {
2205 runq = &processor->processor_set->runq;
2206 if (runq->highq >= BASEPRI_RTQUEUES)
2207 return (AST_PREEMPT | AST_URGENT);
2208
2209 if (runq->highq > current_pri) {
2210 if (runq->urgency > 0)
2211 return (AST_PREEMPT | AST_URGENT);
2212
2213 result |= AST_PREEMPT;
2214 }
2215
2216 runq = &processor->runq;
2217 if (runq->highq > current_pri) {
2218 if (runq->urgency > 0)
2219 return (AST_PREEMPT | AST_URGENT);
2220
2221 result |= AST_PREEMPT;
2222 }
2223 }
2224 else {
2225 runq = &processor->processor_set->runq;
2226 if (runq->highq >= current_pri) {
2227 if (runq->urgency > 0)
2228 return (AST_PREEMPT | AST_URGENT);
2229
2230 result |= AST_PREEMPT;
2231 }
2232
2233 runq = &processor->runq;
2234 if (runq->highq >= current_pri) {
2235 if (runq->urgency > 0)
2236 return (AST_PREEMPT | AST_URGENT);
2237
2238 result |= AST_PREEMPT;
2239 }
2240 }
2241
2242 if (result != AST_NONE)
2243 return (result);
2244
2245 if (thread->state & TH_SUSP)
2246 result |= AST_PREEMPT;
2247
2248 return (result);
2249 }
2250
2251 /*
2252 * set_sched_pri:
2253 *
2254 * Set the scheduled priority of the specified thread.
2255 *
2256 * This may cause the thread to change queues.
2257 *
2258 * Thread must be locked.
2259 */
2260 void
2261 set_sched_pri(
2262 thread_t thread,
2263 int priority)
2264 {
2265 register struct run_queue *rq = run_queue_remove(thread);
2266
2267 if ( !(thread->sched_mode & TH_MODE_TIMESHARE) &&
2268 (priority >= BASEPRI_PREEMPT ||
2269 (thread->task_priority < MINPRI_KERNEL &&
2270 thread->task_priority >= BASEPRI_BACKGROUND &&
2271 priority > thread->task_priority) ||
2272 (thread->sched_mode & TH_MODE_FORCEDPREEMPT) ) )
2273 thread->sched_mode |= TH_MODE_PREEMPT;
2274 else
2275 thread->sched_mode &= ~TH_MODE_PREEMPT;
2276
2277 thread->sched_pri = priority;
2278 if (rq != RUN_QUEUE_NULL)
2279 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
2280 else
2281 if (thread->state & TH_RUN) {
2282 processor_t processor = thread->last_processor;
2283
2284 if (thread == current_thread()) {
2285 ast_t preempt = csw_check(thread, processor);
2286
2287 if (preempt != AST_NONE)
2288 ast_on(preempt);
2289 processor->current_pri = priority;
2290 }
2291 else
2292 if ( processor != PROCESSOR_NULL &&
2293 processor->active_thread == thread )
2294 cause_ast_check(processor);
2295 }
2296 }
2297
2298 /*
2299 * run_queue_remove:
2300 *
2301 * Remove a thread from its current run queue and
2302 * return the run queue if successful.
2303 *
2304 * Thread must be locked.
2305 */
2306 run_queue_t
2307 run_queue_remove(
2308 thread_t thread)
2309 {
2310 register run_queue_t rq = thread->runq;
2311
2312 /*
2313 * If rq is RUN_QUEUE_NULL, the thread will stay out of the
2314 * run queues because the caller locked the thread. Otherwise
2315 * the thread is on a run queue, but could be chosen for dispatch
2316 * and removed.
2317 */
2318 if (rq != RUN_QUEUE_NULL) {
2319 processor_set_t pset = thread->processor_set;
2320 processor_t processor = thread->bound_processor;
2321
2322 /*
2323 * The run queues are locked by the pset scheduling
2324 * lock, except when a processor is off-line the
2325 * local run queue is locked by the processor lock.
2326 */
2327 if (processor != PROCESSOR_NULL) {
2328 processor_lock(processor);
2329 pset = processor->processor_set;
2330 }
2331
2332 if (pset != PROCESSOR_SET_NULL)
2333 simple_lock(&pset->sched_lock);
2334
2335 if (rq == thread->runq) {
2336 /*
2337 * Thread is on a run queue and we have a lock on
2338 * that run queue.
2339 */
2340 remqueue(&rq->queues[0], (queue_entry_t)thread);
2341 rq->count--;
2342 if (thread->sched_mode & TH_MODE_PREEMPT)
2343 rq->urgency--;
2344 assert(rq->urgency >= 0);
2345
2346 if (queue_empty(rq->queues + thread->sched_pri)) {
2347 /* update run queue status */
2348 if (thread->sched_pri != IDLEPRI)
2349 clrbit(MAXPRI - thread->sched_pri, rq->bitmap);
2350 rq->highq = MAXPRI - ffsbit(rq->bitmap);
2351 }
2352
2353 thread->runq = RUN_QUEUE_NULL;
2354 }
2355 else {
2356 /*
2357 * The thread left the run queue before we could
2358 * lock the run queue.
2359 */
2360 assert(thread->runq == RUN_QUEUE_NULL);
2361 rq = RUN_QUEUE_NULL;
2362 }
2363
2364 if (pset != PROCESSOR_SET_NULL)
2365 simple_unlock(&pset->sched_lock);
2366
2367 if (processor != PROCESSOR_NULL)
2368 processor_unlock(processor);
2369 }
2370
2371 return (rq);
2372 }
2373
2374 /*
2375 * choose_thread:
2376 *
2377 * Remove a thread to execute from the run queues
2378 * and return it.
2379 *
2380 * Called with pset scheduling lock held.
2381 */
2382 static thread_t
2383 choose_thread(
2384 processor_set_t pset,
2385 processor_t processor)
2386 {
2387 register run_queue_t runq;
2388 register thread_t thread;
2389 register queue_t q;
2390
2391 runq = &processor->runq;
2392
2393 if (runq->count > 0 && runq->highq >= pset->runq.highq) {
2394 q = runq->queues + runq->highq;
2395
2396 thread = (thread_t)q->next;
2397 ((queue_entry_t)thread)->next->prev = q;
2398 q->next = ((queue_entry_t)thread)->next;
2399 thread->runq = RUN_QUEUE_NULL;
2400 runq->count--;
2401 if (thread->sched_mode & TH_MODE_PREEMPT)
2402 runq->urgency--;
2403 assert(runq->urgency >= 0);
2404 if (queue_empty(q)) {
2405 if (runq->highq != IDLEPRI)
2406 clrbit(MAXPRI - runq->highq, runq->bitmap);
2407 runq->highq = MAXPRI - ffsbit(runq->bitmap);
2408 }
2409
2410 processor->deadline = UINT64_MAX;
2411
2412 return (thread);
2413 }
2414
2415 runq = &pset->runq;
2416
2417 assert(runq->count > 0);
2418 q = runq->queues + runq->highq;
2419
2420 thread = (thread_t)q->next;
2421 ((queue_entry_t)thread)->next->prev = q;
2422 q->next = ((queue_entry_t)thread)->next;
2423 thread->runq = RUN_QUEUE_NULL;
2424 runq->count--;
2425 if (runq->highq >= BASEPRI_RTQUEUES)
2426 processor->deadline = thread->realtime.deadline;
2427 else
2428 processor->deadline = UINT64_MAX;
2429 if (thread->sched_mode & TH_MODE_PREEMPT)
2430 runq->urgency--;
2431 assert(runq->urgency >= 0);
2432 if (queue_empty(q)) {
2433 if (runq->highq != IDLEPRI)
2434 clrbit(MAXPRI - runq->highq, runq->bitmap);
2435 runq->highq = MAXPRI - ffsbit(runq->bitmap);
2436 }
2437
2438 timeshare_quanta_update(pset);
2439
2440 return (thread);
2441 }
2442
2443 /*
2444 * no_dispatch_count counts number of times processors go non-idle
2445 * without being dispatched. This should be very rare.
2446 */
2447 int no_dispatch_count = 0;
2448
2449 /*
2450 * This is the idle thread, which just looks for other threads
2451 * to execute.
2452 */
2453 void
2454 idle_thread_continue(void)
2455 {
2456 register processor_t processor;
2457 register volatile thread_t *threadp;
2458 register volatile int *gcount;
2459 register volatile int *lcount;
2460 register thread_t new_thread;
2461 register int state;
2462 register processor_set_t pset;
2463 int mycpu;
2464
2465 mycpu = cpu_number();
2466 processor = cpu_to_processor(mycpu);
2467 threadp = (volatile thread_t *) &processor->next_thread;
2468 lcount = (volatile int *) &processor->runq.count;
2469
2470 gcount = (volatile int *)&processor->processor_set->runq.count;
2471
2472 (void)splsched();
2473 while ( (*threadp == (volatile thread_t)THREAD_NULL) &&
2474 (*gcount == 0) && (*lcount == 0) ) {
2475
2476 /* check for ASTs while we wait */
2477 if (need_ast[mycpu] &~ ( AST_SCHEDULING | AST_BSD )) {
2478 /* no ASTs for us */
2479 need_ast[mycpu] &= AST_NONE;
2480 (void)spllo();
2481 }
2482 else
2483 machine_idle();
2484
2485 (void)splsched();
2486 }
2487
2488 /*
2489 * This is not a switch statement to avoid the
2490 * bounds checking code in the common case.
2491 */
2492 pset = processor->processor_set;
2493 simple_lock(&pset->sched_lock);
2494
2495 state = processor->state;
2496 if (state == PROCESSOR_DISPATCHING) {
2497 /*
2498 * Commmon case -- cpu dispatched.
2499 */
2500 new_thread = *threadp;
2501 *threadp = (volatile thread_t) THREAD_NULL;
2502 processor->state = PROCESSOR_RUNNING;
2503 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
2504
2505 if ( pset->runq.highq >= BASEPRI_RTQUEUES &&
2506 new_thread->sched_pri >= BASEPRI_RTQUEUES ) {
2507 register run_queue_t runq = &pset->runq;
2508 register queue_t q;
2509
2510 q = runq->queues + runq->highq;
2511 if (((thread_t)q->next)->realtime.deadline <
2512 processor->deadline) {
2513 thread_t thread = new_thread;
2514
2515 new_thread = (thread_t)q->next;
2516 ((queue_entry_t)new_thread)->next->prev = q;
2517 q->next = ((queue_entry_t)new_thread)->next;
2518 new_thread->runq = RUN_QUEUE_NULL;
2519 processor->deadline = new_thread->realtime.deadline;
2520 assert(new_thread->sched_mode & TH_MODE_PREEMPT);
2521 runq->count--; runq->urgency--;
2522 if (queue_empty(q)) {
2523 if (runq->highq != IDLEPRI)
2524 clrbit(MAXPRI - runq->highq, runq->bitmap);
2525 runq->highq = MAXPRI - ffsbit(runq->bitmap);
2526 }
2527 dispatch_counts.missed_realtime++;
2528 simple_unlock(&pset->sched_lock);
2529
2530 thread_lock(thread);
2531 thread_setrun(thread, SCHED_HEADQ);
2532 thread_unlock(thread);
2533
2534 counter(c_idle_thread_handoff++);
2535 thread_run(processor->idle_thread,
2536 idle_thread_continue, new_thread);
2537 /*NOTREACHED*/
2538 }
2539 simple_unlock(&pset->sched_lock);
2540
2541 counter(c_idle_thread_handoff++);
2542 thread_run(processor->idle_thread,
2543 idle_thread_continue, new_thread);
2544 /*NOTREACHED*/
2545 }
2546
2547 if ( processor->runq.highq > new_thread->sched_pri ||
2548 pset->runq.highq > new_thread->sched_pri ) {
2549 thread_t thread = new_thread;
2550
2551 new_thread = choose_thread(pset, processor);
2552 dispatch_counts.missed_other++;
2553 simple_unlock(&pset->sched_lock);
2554
2555 thread_lock(thread);
2556 thread_setrun(thread, SCHED_HEADQ);
2557 thread_unlock(thread);
2558
2559 counter(c_idle_thread_handoff++);
2560 thread_run(processor->idle_thread,
2561 idle_thread_continue, new_thread);
2562 /* NOTREACHED */
2563 }
2564 else {
2565 simple_unlock(&pset->sched_lock);
2566
2567 counter(c_idle_thread_handoff++);
2568 thread_run(processor->idle_thread,
2569 idle_thread_continue, new_thread);
2570 /* NOTREACHED */
2571 }
2572 }
2573 else
2574 if (state == PROCESSOR_IDLE) {
2575 /*
2576 * Processor was not dispatched (Rare).
2577 * Set it running again and force a
2578 * reschedule.
2579 */
2580 no_dispatch_count++;
2581 pset->idle_count--;
2582 remqueue(&pset->idle_queue, (queue_entry_t)processor);
2583 processor->state = PROCESSOR_RUNNING;
2584 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
2585 simple_unlock(&pset->sched_lock);
2586
2587 counter(c_idle_thread_block++);
2588 thread_block(idle_thread_continue);
2589 /* NOTREACHED */
2590 }
2591 else
2592 if (state == PROCESSOR_SHUTDOWN) {
2593 /*
2594 * Going off-line. Force a
2595 * reschedule.
2596 */
2597 if ((new_thread = (thread_t)*threadp) != THREAD_NULL) {
2598 *threadp = (volatile thread_t) THREAD_NULL;
2599 processor->deadline = UINT64_MAX;
2600 simple_unlock(&pset->sched_lock);
2601
2602 thread_lock(new_thread);
2603 thread_setrun(new_thread, SCHED_HEADQ);
2604 thread_unlock(new_thread);
2605 }
2606 else
2607 simple_unlock(&pset->sched_lock);
2608
2609 counter(c_idle_thread_block++);
2610 thread_block(idle_thread_continue);
2611 /* NOTREACHED */
2612 }
2613
2614 simple_unlock(&pset->sched_lock);
2615
2616 panic("idle_thread: state %d\n", cpu_state(mycpu));
2617 /*NOTREACHED*/
2618 }
2619
2620 void
2621 idle_thread(void)
2622 {
2623 counter(c_idle_thread_block++);
2624 thread_block(idle_thread_continue);
2625 /*NOTREACHED*/
2626 }
2627
2628 static uint64_t sched_tick_deadline;
2629
2630 void sched_tick_thread(void);
2631
2632 void
2633 sched_tick_init(void)
2634 {
2635 kernel_thread_with_priority(sched_tick_thread, MAXPRI_STANDARD);
2636 }
2637
2638 /*
2639 * sched_tick_thread
2640 *
2641 * Perform periodic bookkeeping functions about ten
2642 * times per second.
2643 */
2644 void
2645 sched_tick_thread_continue(void)
2646 {
2647 uint64_t abstime;
2648 #if SIMPLE_CLOCK
2649 int new_usec;
2650 #endif /* SIMPLE_CLOCK */
2651
2652 abstime = mach_absolute_time();
2653
2654 sched_tick++; /* age usage one more time */
2655 #if SIMPLE_CLOCK
2656 /*
2657 * Compensate for clock drift. sched_usec is an
2658 * exponential average of the number of microseconds in
2659 * a second. It decays in the same fashion as cpu_usage.
2660 */
2661 new_usec = sched_usec_elapsed();
2662 sched_usec = (5*sched_usec + 3*new_usec)/8;
2663 #endif /* SIMPLE_CLOCK */
2664
2665 /*
2666 * Compute the scheduler load factors.
2667 */
2668 compute_mach_factor();
2669
2670 /*
2671 * Scan the run queues for timesharing threads which
2672 * may need to have their priorities recalculated.
2673 */
2674 do_thread_scan();
2675
2676 clock_deadline_for_periodic_event(sched_tick_interval, abstime,
2677 &sched_tick_deadline);
2678
2679 assert_wait((event_t)sched_tick_thread_continue, THREAD_INTERRUPTIBLE);
2680 thread_set_timer_deadline(sched_tick_deadline);
2681 thread_block(sched_tick_thread_continue);
2682 /*NOTREACHED*/
2683 }
2684
2685 void
2686 sched_tick_thread(void)
2687 {
2688 sched_tick_deadline = mach_absolute_time();
2689
2690 thread_block(sched_tick_thread_continue);
2691 /*NOTREACHED*/
2692 }
2693
2694 /*
2695 * do_thread_scan:
2696 *
2697 * Scan the run queues for timesharing threads which need
2698 * to be aged, possibily adjusting their priorities upwards.
2699 *
2700 * Scanner runs in two passes. Pass one squirrels likely
2701 * thread away in an array (takes out references for them).
2702 * Pass two does the priority updates. This is necessary because
2703 * the run queue lock is required for the candidate scan, but
2704 * cannot be held during updates.
2705 *
2706 * Array length should be enough so that restart isn't necessary,
2707 * but restart logic is included.
2708 *
2709 */
2710
2711 #define MAX_STUCK_THREADS 128
2712
2713 static thread_t stuck_threads[MAX_STUCK_THREADS];
2714 static int stuck_count = 0;
2715
2716 /*
2717 * do_runq_scan is the guts of pass 1. It scans a runq for
2718 * stuck threads. A boolean is returned indicating whether
2719 * a retry is needed.
2720 */
2721 static boolean_t
2722 do_runq_scan(
2723 run_queue_t runq)
2724 {
2725 register queue_t q;
2726 register thread_t thread;
2727 register int count;
2728 boolean_t result = FALSE;
2729
2730 if ((count = runq->count) > 0) {
2731 q = runq->queues + runq->highq;
2732 while (count > 0) {
2733 queue_iterate(q, thread, thread_t, links) {
2734 if ( thread->sched_stamp != sched_tick &&
2735 (thread->sched_mode & TH_MODE_TIMESHARE) ) {
2736 /*
2737 * Stuck, save its id for later.
2738 */
2739 if (stuck_count == MAX_STUCK_THREADS) {
2740 /*
2741 * !@#$% No more room.
2742 */
2743 return (TRUE);
2744 }
2745
2746 if (thread_lock_try(thread)) {
2747 thread->ref_count++;
2748 thread_unlock(thread);
2749 stuck_threads[stuck_count++] = thread;
2750 }
2751 else
2752 result = TRUE;
2753 }
2754
2755 count--;
2756 }
2757
2758 q--;
2759 }
2760 }
2761
2762 return (result);
2763 }
2764
2765 boolean_t thread_scan_enabled = TRUE;
2766
2767 static void
2768 do_thread_scan(void)
2769 {
2770 register boolean_t restart_needed = FALSE;
2771 register thread_t thread;
2772 register processor_set_t pset = &default_pset;
2773 register processor_t processor;
2774 spl_t s;
2775
2776 if (!thread_scan_enabled)
2777 return;
2778
2779 do {
2780 s = splsched();
2781 simple_lock(&pset->sched_lock);
2782 restart_needed = do_runq_scan(&pset->runq);
2783 simple_unlock(&pset->sched_lock);
2784
2785 if (!restart_needed) {
2786 simple_lock(&pset->sched_lock);
2787 processor = (processor_t)queue_first(&pset->processors);
2788 while (!queue_end(&pset->processors, (queue_entry_t)processor)) {
2789 if (restart_needed = do_runq_scan(&processor->runq))
2790 break;
2791
2792 thread = processor->idle_thread;
2793 if (thread->sched_stamp != sched_tick) {
2794 if (stuck_count == MAX_STUCK_THREADS) {
2795 restart_needed = TRUE;
2796 break;
2797 }
2798
2799 stuck_threads[stuck_count++] = thread;
2800 }
2801
2802 processor = (processor_t)queue_next(&processor->processors);
2803 }
2804 simple_unlock(&pset->sched_lock);
2805 }
2806 splx(s);
2807
2808 /*
2809 * Ok, we now have a collection of candidates -- fix them.
2810 */
2811 while (stuck_count > 0) {
2812 boolean_t idle_thread;
2813
2814 thread = stuck_threads[--stuck_count];
2815 stuck_threads[stuck_count] = THREAD_NULL;
2816
2817 s = splsched();
2818 thread_lock(thread);
2819 idle_thread = (thread->state & TH_IDLE) != 0;
2820 if ( !(thread->state & (TH_WAIT|TH_SUSP)) &&
2821 thread->sched_stamp != sched_tick )
2822 update_priority(thread);
2823 thread_unlock(thread);
2824 splx(s);
2825
2826 if (!idle_thread)
2827 thread_deallocate(thread);
2828 }
2829
2830 if (restart_needed)
2831 delay(1); /* XXX */
2832
2833 } while (restart_needed);
2834 }
2835
2836 /*
2837 * Just in case someone doesn't use the macro
2838 */
2839 #undef thread_wakeup
2840 void
2841 thread_wakeup(
2842 event_t x);
2843
2844 void
2845 thread_wakeup(
2846 event_t x)
2847 {
2848 thread_wakeup_with_result(x, THREAD_AWAKENED);
2849 }
2850
2851
2852 #if DEBUG
2853 static boolean_t
2854 thread_runnable(
2855 thread_t thread)
2856 {
2857 return ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN);
2858 }
2859 #endif /* DEBUG */
2860
2861 #if MACH_KDB
2862 #include <ddb/db_output.h>
2863 #define printf kdbprintf
2864 extern int db_indent;
2865 void db_sched(void);
2866
2867 void
2868 db_sched(void)
2869 {
2870 iprintf("Scheduling Statistics:\n");
2871 db_indent += 2;
2872 iprintf("Thread invocations: csw %d same %d\n",
2873 c_thread_invoke_csw, c_thread_invoke_same);
2874 #if MACH_COUNTERS
2875 iprintf("Thread block: calls %d\n",
2876 c_thread_block_calls);
2877 iprintf("Idle thread:\n\thandoff %d block %d no_dispatch %d\n",
2878 c_idle_thread_handoff,
2879 c_idle_thread_block, no_dispatch_count);
2880 iprintf("Sched thread blocks: %d\n", c_sched_thread_block);
2881 #endif /* MACH_COUNTERS */
2882 db_indent -= 2;
2883 }
2884
2885 #include <ddb/db_output.h>
2886 void db_show_thread_log(void);
2887
2888 void
2889 db_show_thread_log(void)
2890 {
2891 }
2892 #endif /* MACH_KDB */