]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/sched_prim.c
xnu-792.25.20.tar.gz
[apple/xnu.git] / osfmk / kern / sched_prim.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_FREE_COPYRIGHT@
24 */
25 /*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50 /*
51 */
52 /*
53 * File: sched_prim.c
54 * Author: Avadis Tevanian, Jr.
55 * Date: 1986
56 *
57 * Scheduling primitives
58 *
59 */
60
61 #include <debug.h>
62 #include <mach_kdb.h>
63
64 #include <ddb/db_output.h>
65
66 #include <mach/mach_types.h>
67 #include <mach/machine.h>
68 #include <mach/policy.h>
69 #include <mach/sync_policy.h>
70
71 #include <machine/machine_routines.h>
72 #include <machine/sched_param.h>
73 #include <machine/machine_cpu.h>
74
75 #include <kern/kern_types.h>
76 #include <kern/clock.h>
77 #include <kern/counters.h>
78 #include <kern/cpu_number.h>
79 #include <kern/cpu_data.h>
80 #include <kern/debug.h>
81 #include <kern/lock.h>
82 #include <kern/macro_help.h>
83 #include <kern/machine.h>
84 #include <kern/misc_protos.h>
85 #include <kern/processor.h>
86 #include <kern/queue.h>
87 #include <kern/sched.h>
88 #include <kern/sched_prim.h>
89 #include <kern/syscall_subr.h>
90 #include <kern/task.h>
91 #include <kern/thread.h>
92 #include <kern/wait_queue.h>
93
94 #include <vm/pmap.h>
95 #include <vm/vm_kern.h>
96 #include <vm/vm_map.h>
97
98 #include <sys/kdebug.h>
99
100 #include <kern/pms.h>
101
102 #define DEFAULT_PREEMPTION_RATE 100 /* (1/s) */
103 int default_preemption_rate = DEFAULT_PREEMPTION_RATE;
104
105 #define MAX_UNSAFE_QUANTA 800
106 int max_unsafe_quanta = MAX_UNSAFE_QUANTA;
107
108 #define MAX_POLL_QUANTA 2
109 int max_poll_quanta = MAX_POLL_QUANTA;
110
111 #define SCHED_POLL_YIELD_SHIFT 4 /* 1/16 */
112 int sched_poll_yield_shift = SCHED_POLL_YIELD_SHIFT;
113
114 uint64_t max_unsafe_computation;
115 uint32_t sched_safe_duration;
116 uint64_t max_poll_computation;
117
118 uint32_t std_quantum;
119 uint32_t min_std_quantum;
120
121 uint32_t std_quantum_us;
122
123 uint32_t max_rt_quantum;
124 uint32_t min_rt_quantum;
125
126 uint32_t sched_cswtime;
127
128 static uint32_t delay_idle_limit, delay_idle_spin;
129 static processor_t delay_idle(
130 processor_t processor,
131 thread_t self);
132
133 unsigned sched_tick;
134 uint32_t sched_tick_interval;
135
136 uint32_t sched_pri_shift;
137
138 /* Forwards */
139 void wait_queues_init(void);
140
141 static void load_shift_init(void);
142
143 static thread_t choose_thread(
144 processor_set_t pset,
145 processor_t processor);
146
147 static void thread_update_scan(void);
148
149 #if DEBUG
150 static
151 boolean_t thread_runnable(
152 thread_t thread);
153
154 #endif /*DEBUG*/
155
156
157 /*
158 * State machine
159 *
160 * states are combinations of:
161 * R running
162 * W waiting (or on wait queue)
163 * N non-interruptible
164 * O swapped out
165 * I being swapped in
166 *
167 * init action
168 * assert_wait thread_block clear_wait swapout swapin
169 *
170 * R RW, RWN R; setrun - -
171 * RN RWN RN; setrun - -
172 *
173 * RW W R -
174 * RWN WN RN -
175 *
176 * W R; setrun WO
177 * WN RN; setrun -
178 *
179 * RO - - R
180 *
181 */
182
183 /*
184 * Waiting protocols and implementation:
185 *
186 * Each thread may be waiting for exactly one event; this event
187 * is set using assert_wait(). That thread may be awakened either
188 * by performing a thread_wakeup_prim() on its event,
189 * or by directly waking that thread up with clear_wait().
190 *
191 * The implementation of wait events uses a hash table. Each
192 * bucket is queue of threads having the same hash function
193 * value; the chain for the queue (linked list) is the run queue
194 * field. [It is not possible to be waiting and runnable at the
195 * same time.]
196 *
197 * Locks on both the thread and on the hash buckets govern the
198 * wait event field and the queue chain field. Because wakeup
199 * operations only have the event as an argument, the event hash
200 * bucket must be locked before any thread.
201 *
202 * Scheduling operations may also occur at interrupt level; therefore,
203 * interrupts below splsched() must be prevented when holding
204 * thread or hash bucket locks.
205 *
206 * The wait event hash table declarations are as follows:
207 */
208
209 #define NUMQUEUES 59
210
211 struct wait_queue wait_queues[NUMQUEUES];
212
213 #define wait_hash(event) \
214 ((((int)(event) < 0)? ~(int)(event): (int)(event)) % NUMQUEUES)
215
216 int8_t sched_load_shifts[NRQS];
217
218 void
219 sched_init(void)
220 {
221 /*
222 * Calculate the timeslicing quantum
223 * in us.
224 */
225 if (default_preemption_rate < 1)
226 default_preemption_rate = DEFAULT_PREEMPTION_RATE;
227 std_quantum_us = (1000 * 1000) / default_preemption_rate;
228
229 printf("standard timeslicing quantum is %d us\n", std_quantum_us);
230
231 sched_safe_duration = (2 * max_unsafe_quanta / default_preemption_rate) *
232 (1 << SCHED_TICK_SHIFT);
233
234 wait_queues_init();
235 load_shift_init();
236 pset_init(&default_pset);
237 sched_tick = 0;
238 ast_init();
239 }
240
241 void
242 sched_timebase_init(void)
243 {
244 uint64_t abstime;
245 uint32_t shift;
246
247 /* standard timeslicing quantum */
248 clock_interval_to_absolutetime_interval(
249 std_quantum_us, NSEC_PER_USEC, &abstime);
250 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
251 std_quantum = abstime;
252
253 /* smallest remaining quantum (250 us) */
254 clock_interval_to_absolutetime_interval(250, NSEC_PER_USEC, &abstime);
255 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
256 min_std_quantum = abstime;
257
258 /* smallest rt computaton (50 us) */
259 clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime);
260 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
261 min_rt_quantum = abstime;
262
263 /* maximum rt computation (50 ms) */
264 clock_interval_to_absolutetime_interval(
265 50, 1000*NSEC_PER_USEC, &abstime);
266 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
267 max_rt_quantum = abstime;
268
269 /* scheduler tick interval */
270 clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT,
271 NSEC_PER_USEC, &abstime);
272 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
273 sched_tick_interval = abstime;
274
275 /*
276 * Compute conversion factor from usage to
277 * timesharing priorities with 5/8 ** n aging.
278 */
279 abstime = (abstime * 5) / 3;
280 for (shift = 0; abstime > BASEPRI_DEFAULT; ++shift)
281 abstime >>= 1;
282 sched_pri_shift = shift;
283
284 max_unsafe_computation = max_unsafe_quanta * std_quantum;
285 max_poll_computation = max_poll_quanta * std_quantum;
286
287 /* delay idle constant(s) (60, 1 us) */
288 clock_interval_to_absolutetime_interval(60, NSEC_PER_USEC, &abstime);
289 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
290 delay_idle_limit = abstime;
291
292 clock_interval_to_absolutetime_interval(1, NSEC_PER_USEC, &abstime);
293 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
294 delay_idle_spin = abstime;
295 }
296
297 void
298 wait_queues_init(void)
299 {
300 register int i;
301
302 for (i = 0; i < NUMQUEUES; i++) {
303 wait_queue_init(&wait_queues[i], SYNC_POLICY_FIFO);
304 }
305 }
306
307 /*
308 * Set up values for timeshare
309 * loading factors.
310 */
311 static void
312 load_shift_init(void)
313 {
314 int8_t k, *p = sched_load_shifts;
315 uint32_t i, j;
316
317 *p++ = INT8_MIN; *p++ = 0;
318
319 for (i = j = 2, k = 1; i < NRQS; ++k) {
320 for (j <<= 1; i < j; ++i)
321 *p++ = k;
322 }
323 }
324
325 /*
326 * Thread wait timer expiration.
327 */
328 void
329 thread_timer_expire(
330 void *p0,
331 __unused void *p1)
332 {
333 thread_t thread = p0;
334 spl_t s;
335
336 s = splsched();
337 thread_lock(thread);
338 if (--thread->wait_timer_active == 0) {
339 if (thread->wait_timer_is_set) {
340 thread->wait_timer_is_set = FALSE;
341 clear_wait_internal(thread, THREAD_TIMED_OUT);
342 }
343 }
344 thread_unlock(thread);
345 splx(s);
346 }
347
348 /*
349 * thread_set_timer:
350 *
351 * Set a timer for the current thread, if the thread
352 * is ready to wait. Must be called between assert_wait()
353 * and thread_block().
354 */
355 void
356 thread_set_timer(
357 uint32_t interval,
358 uint32_t scale_factor)
359 {
360 thread_t thread = current_thread();
361 uint64_t deadline;
362 spl_t s;
363
364 s = splsched();
365 thread_lock(thread);
366 if ((thread->state & TH_WAIT) != 0) {
367 clock_interval_to_deadline(interval, scale_factor, &deadline);
368 if (!timer_call_enter(&thread->wait_timer, deadline))
369 thread->wait_timer_active++;
370 thread->wait_timer_is_set = TRUE;
371 }
372 thread_unlock(thread);
373 splx(s);
374 }
375
376 void
377 thread_set_timer_deadline(
378 uint64_t deadline)
379 {
380 thread_t thread = current_thread();
381 spl_t s;
382
383 s = splsched();
384 thread_lock(thread);
385 if ((thread->state & TH_WAIT) != 0) {
386 if (!timer_call_enter(&thread->wait_timer, deadline))
387 thread->wait_timer_active++;
388 thread->wait_timer_is_set = TRUE;
389 }
390 thread_unlock(thread);
391 splx(s);
392 }
393
394 void
395 thread_cancel_timer(void)
396 {
397 thread_t thread = current_thread();
398 spl_t s;
399
400 s = splsched();
401 thread_lock(thread);
402 if (thread->wait_timer_is_set) {
403 if (timer_call_cancel(&thread->wait_timer))
404 thread->wait_timer_active--;
405 thread->wait_timer_is_set = FALSE;
406 }
407 thread_unlock(thread);
408 splx(s);
409 }
410
411 /*
412 * thread_unblock:
413 *
414 * Unblock thread on wake up.
415 *
416 * Returns TRUE if the thread is still running.
417 *
418 * Thread must be locked.
419 */
420 boolean_t
421 thread_unblock(
422 thread_t thread,
423 wait_result_t wresult)
424 {
425 boolean_t result = FALSE;
426
427 /*
428 * Set wait_result.
429 */
430 thread->wait_result = wresult;
431
432 /*
433 * Cancel pending wait timer.
434 */
435 if (thread->wait_timer_is_set) {
436 if (timer_call_cancel(&thread->wait_timer))
437 thread->wait_timer_active--;
438 thread->wait_timer_is_set = FALSE;
439 }
440
441 /*
442 * Update scheduling state.
443 */
444 thread->state &= ~(TH_WAIT|TH_UNINT);
445
446 if (!(thread->state & TH_RUN)) {
447 thread->state |= TH_RUN;
448
449 /*
450 * Mark unblocked if call out.
451 */
452 if (thread->options & TH_OPT_CALLOUT)
453 call_thread_unblock();
454
455 /*
456 * Update pset run counts.
457 */
458 pset_run_incr(thread->processor_set);
459 if (thread->sched_mode & TH_MODE_TIMESHARE)
460 pset_share_incr(thread->processor_set);
461 }
462 else
463 result = TRUE;
464
465 /*
466 * Calculate deadline for real-time threads.
467 */
468 if (thread->sched_mode & TH_MODE_REALTIME) {
469 thread->realtime.deadline = mach_absolute_time();
470 thread->realtime.deadline += thread->realtime.constraint;
471 }
472
473 /*
474 * Clear old quantum, fail-safe computation, etc.
475 */
476 thread->current_quantum = 0;
477 thread->computation_metered = 0;
478 thread->reason = AST_NONE;
479
480 KERNEL_DEBUG_CONSTANT(
481 MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE,
482 (int)thread, (int)thread->sched_pri, 0, 0, 0);
483
484 return (result);
485 }
486
487 /*
488 * Routine: thread_go
489 * Purpose:
490 * Unblock and dispatch thread.
491 * Conditions:
492 * thread lock held, IPC locks may be held.
493 * thread must have been pulled from wait queue under same lock hold.
494 * Returns:
495 * KERN_SUCCESS - Thread was set running
496 * KERN_NOT_WAITING - Thread was not waiting
497 */
498 kern_return_t
499 thread_go(
500 thread_t thread,
501 wait_result_t wresult)
502 {
503 assert(thread->at_safe_point == FALSE);
504 assert(thread->wait_event == NO_EVENT64);
505 assert(thread->wait_queue == WAIT_QUEUE_NULL);
506
507 if ((thread->state & (TH_WAIT|TH_TERMINATE)) == TH_WAIT) {
508 if (!thread_unblock(thread, wresult))
509 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
510
511 return (KERN_SUCCESS);
512 }
513
514 return (KERN_NOT_WAITING);
515 }
516
517 /*
518 * Routine: thread_mark_wait_locked
519 * Purpose:
520 * Mark a thread as waiting. If, given the circumstances,
521 * it doesn't want to wait (i.e. already aborted), then
522 * indicate that in the return value.
523 * Conditions:
524 * at splsched() and thread is locked.
525 */
526 __private_extern__
527 wait_result_t
528 thread_mark_wait_locked(
529 thread_t thread,
530 wait_interrupt_t interruptible)
531 {
532 boolean_t at_safe_point;
533
534 /*
535 * The thread may have certain types of interrupts/aborts masked
536 * off. Even if the wait location says these types of interrupts
537 * are OK, we have to honor mask settings (outer-scoped code may
538 * not be able to handle aborts at the moment).
539 */
540 if (interruptible > (thread->options & TH_OPT_INTMASK))
541 interruptible = thread->options & TH_OPT_INTMASK;
542
543 at_safe_point = (interruptible == THREAD_ABORTSAFE);
544
545 if ( interruptible == THREAD_UNINT ||
546 !(thread->state & TH_ABORT) ||
547 (!at_safe_point &&
548 (thread->state & TH_ABORT_SAFELY)) ) {
549 thread->state |= (interruptible) ? TH_WAIT : (TH_WAIT | TH_UNINT);
550 thread->at_safe_point = at_safe_point;
551 return (thread->wait_result = THREAD_WAITING);
552 }
553 else
554 if (thread->state & TH_ABORT_SAFELY)
555 thread->state &= ~(TH_ABORT|TH_ABORT_SAFELY);
556
557 return (thread->wait_result = THREAD_INTERRUPTED);
558 }
559
560 /*
561 * Routine: thread_interrupt_level
562 * Purpose:
563 * Set the maximum interruptible state for the
564 * current thread. The effective value of any
565 * interruptible flag passed into assert_wait
566 * will never exceed this.
567 *
568 * Useful for code that must not be interrupted,
569 * but which calls code that doesn't know that.
570 * Returns:
571 * The old interrupt level for the thread.
572 */
573 __private_extern__
574 wait_interrupt_t
575 thread_interrupt_level(
576 wait_interrupt_t new_level)
577 {
578 thread_t thread = current_thread();
579 wait_interrupt_t result = thread->options & TH_OPT_INTMASK;
580
581 thread->options = (thread->options & ~TH_OPT_INTMASK) | (new_level & TH_OPT_INTMASK);
582
583 return result;
584 }
585
586 /*
587 * Check to see if an assert wait is possible, without actually doing one.
588 * This is used by debug code in locks and elsewhere to verify that it is
589 * always OK to block when trying to take a blocking lock (since waiting
590 * for the actual assert_wait to catch the case may make it hard to detect
591 * this case.
592 */
593 boolean_t
594 assert_wait_possible(void)
595 {
596
597 thread_t thread;
598
599 #if DEBUG
600 if(debug_mode) return TRUE; /* Always succeed in debug mode */
601 #endif
602
603 thread = current_thread();
604
605 return (thread == NULL || wait_queue_assert_possible(thread));
606 }
607
608 /*
609 * assert_wait:
610 *
611 * Assert that the current thread is about to go to
612 * sleep until the specified event occurs.
613 */
614 wait_result_t
615 assert_wait(
616 event_t event,
617 wait_interrupt_t interruptible)
618 {
619 register wait_queue_t wq;
620 register int index;
621
622 assert(event != NO_EVENT);
623
624 index = wait_hash(event);
625 wq = &wait_queues[index];
626 return wait_queue_assert_wait(wq, event, interruptible, 0);
627 }
628
629 wait_result_t
630 assert_wait_timeout(
631 event_t event,
632 wait_interrupt_t interruptible,
633 uint32_t interval,
634 uint32_t scale_factor)
635 {
636 thread_t thread = current_thread();
637 wait_result_t wresult;
638 wait_queue_t wqueue;
639 uint64_t deadline;
640 spl_t s;
641
642 assert(event != NO_EVENT);
643 wqueue = &wait_queues[wait_hash(event)];
644
645 s = splsched();
646 wait_queue_lock(wqueue);
647 thread_lock(thread);
648
649 clock_interval_to_deadline(interval, scale_factor, &deadline);
650 wresult = wait_queue_assert_wait64_locked(wqueue, (uint32_t)event,
651 interruptible, deadline, thread);
652
653 thread_unlock(thread);
654 wait_queue_unlock(wqueue);
655 splx(s);
656
657 return (wresult);
658 }
659
660 wait_result_t
661 assert_wait_deadline(
662 event_t event,
663 wait_interrupt_t interruptible,
664 uint64_t deadline)
665 {
666 thread_t thread = current_thread();
667 wait_result_t wresult;
668 wait_queue_t wqueue;
669 spl_t s;
670
671 assert(event != NO_EVENT);
672 wqueue = &wait_queues[wait_hash(event)];
673
674 s = splsched();
675 wait_queue_lock(wqueue);
676 thread_lock(thread);
677
678 wresult = wait_queue_assert_wait64_locked(wqueue, (uint32_t)event,
679 interruptible, deadline, thread);
680
681 thread_unlock(thread);
682 wait_queue_unlock(wqueue);
683 splx(s);
684
685 return (wresult);
686 }
687
688 /*
689 * thread_sleep_fast_usimple_lock:
690 *
691 * Cause the current thread to wait until the specified event
692 * occurs. The specified simple_lock is unlocked before releasing
693 * the cpu and re-acquired as part of waking up.
694 *
695 * This is the simple lock sleep interface for components that use a
696 * faster version of simple_lock() than is provided by usimple_lock().
697 */
698 __private_extern__ wait_result_t
699 thread_sleep_fast_usimple_lock(
700 event_t event,
701 simple_lock_t lock,
702 wait_interrupt_t interruptible)
703 {
704 wait_result_t res;
705
706 res = assert_wait(event, interruptible);
707 if (res == THREAD_WAITING) {
708 simple_unlock(lock);
709 res = thread_block(THREAD_CONTINUE_NULL);
710 simple_lock(lock);
711 }
712 return res;
713 }
714
715
716 /*
717 * thread_sleep_usimple_lock:
718 *
719 * Cause the current thread to wait until the specified event
720 * occurs. The specified usimple_lock is unlocked before releasing
721 * the cpu and re-acquired as part of waking up.
722 *
723 * This is the simple lock sleep interface for components where
724 * simple_lock() is defined in terms of usimple_lock().
725 */
726 wait_result_t
727 thread_sleep_usimple_lock(
728 event_t event,
729 usimple_lock_t lock,
730 wait_interrupt_t interruptible)
731 {
732 wait_result_t res;
733
734 res = assert_wait(event, interruptible);
735 if (res == THREAD_WAITING) {
736 usimple_unlock(lock);
737 res = thread_block(THREAD_CONTINUE_NULL);
738 usimple_lock(lock);
739 }
740 return res;
741 }
742
743 /*
744 * thread_sleep_mutex:
745 *
746 * Cause the current thread to wait until the specified event
747 * occurs. The specified mutex is unlocked before releasing
748 * the cpu. The mutex will be re-acquired before returning.
749 *
750 * JMM - Add hint to make sure mutex is available before rousting
751 */
752 wait_result_t
753 thread_sleep_mutex(
754 event_t event,
755 mutex_t *mutex,
756 wait_interrupt_t interruptible)
757 {
758 wait_result_t res;
759
760 res = assert_wait(event, interruptible);
761 if (res == THREAD_WAITING) {
762 mutex_unlock(mutex);
763 res = thread_block(THREAD_CONTINUE_NULL);
764 mutex_lock(mutex);
765 }
766 return res;
767 }
768
769 /*
770 * thread_sleep_mutex_deadline:
771 *
772 * Cause the current thread to wait until the specified event
773 * (or deadline) occurs. The specified mutex is unlocked before
774 * releasing the cpu. The mutex will be re-acquired before returning.
775 */
776 wait_result_t
777 thread_sleep_mutex_deadline(
778 event_t event,
779 mutex_t *mutex,
780 uint64_t deadline,
781 wait_interrupt_t interruptible)
782 {
783 wait_result_t res;
784
785 res = assert_wait_deadline(event, interruptible, deadline);
786 if (res == THREAD_WAITING) {
787 mutex_unlock(mutex);
788 res = thread_block(THREAD_CONTINUE_NULL);
789 mutex_lock(mutex);
790 }
791 return res;
792 }
793
794 /*
795 * thread_sleep_lock_write:
796 *
797 * Cause the current thread to wait until the specified event
798 * occurs. The specified (write) lock is unlocked before releasing
799 * the cpu. The (write) lock will be re-acquired before returning.
800 */
801 wait_result_t
802 thread_sleep_lock_write(
803 event_t event,
804 lock_t *lock,
805 wait_interrupt_t interruptible)
806 {
807 wait_result_t res;
808
809 res = assert_wait(event, interruptible);
810 if (res == THREAD_WAITING) {
811 lock_write_done(lock);
812 res = thread_block(THREAD_CONTINUE_NULL);
813 lock_write(lock);
814 }
815 return res;
816 }
817
818 /*
819 * thread_stop:
820 *
821 * Force a preemption point for a thread and wait
822 * for it to stop running. Arbitrates access among
823 * multiple stop requests. (released by unstop)
824 *
825 * The thread must enter a wait state and stop via a
826 * separate means.
827 *
828 * Returns FALSE if interrupted.
829 */
830 boolean_t
831 thread_stop(
832 thread_t thread)
833 {
834 wait_result_t wresult;
835 spl_t s;
836
837 s = splsched();
838 wake_lock(thread);
839
840 while (thread->state & TH_SUSP) {
841 thread->wake_active = TRUE;
842 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
843 wake_unlock(thread);
844 splx(s);
845
846 if (wresult == THREAD_WAITING)
847 wresult = thread_block(THREAD_CONTINUE_NULL);
848
849 if (wresult != THREAD_AWAKENED)
850 return (FALSE);
851
852 s = splsched();
853 wake_lock(thread);
854 }
855
856 thread_lock(thread);
857 thread->state |= TH_SUSP;
858
859 while (thread->state & TH_RUN) {
860 processor_t processor = thread->last_processor;
861
862 if ( processor != PROCESSOR_NULL &&
863 processor->state == PROCESSOR_RUNNING &&
864 processor->active_thread == thread )
865 cause_ast_check(processor);
866 thread_unlock(thread);
867
868 thread->wake_active = TRUE;
869 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
870 wake_unlock(thread);
871 splx(s);
872
873 if (wresult == THREAD_WAITING)
874 wresult = thread_block(THREAD_CONTINUE_NULL);
875
876 if (wresult != THREAD_AWAKENED) {
877 thread_unstop(thread);
878 return (FALSE);
879 }
880
881 s = splsched();
882 wake_lock(thread);
883 thread_lock(thread);
884 }
885
886 thread_unlock(thread);
887 wake_unlock(thread);
888 splx(s);
889
890 return (TRUE);
891 }
892
893 /*
894 * thread_unstop:
895 *
896 * Release a previous stop request and set
897 * the thread running if appropriate.
898 *
899 * Use only after a successful stop operation.
900 */
901 void
902 thread_unstop(
903 thread_t thread)
904 {
905 spl_t s = splsched();
906
907 wake_lock(thread);
908 thread_lock(thread);
909
910 if ((thread->state & (TH_RUN|TH_WAIT|TH_SUSP)) == TH_SUSP) {
911 thread->state &= ~TH_SUSP;
912 thread_unblock(thread, THREAD_AWAKENED);
913
914 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
915 }
916 else
917 if (thread->state & TH_SUSP) {
918 thread->state &= ~TH_SUSP;
919
920 if (thread->wake_active) {
921 thread->wake_active = FALSE;
922 thread_unlock(thread);
923 wake_unlock(thread);
924 splx(s);
925
926 thread_wakeup(&thread->wake_active);
927 return;
928 }
929 }
930
931 thread_unlock(thread);
932 wake_unlock(thread);
933 splx(s);
934 }
935
936 /*
937 * thread_wait:
938 *
939 * Wait for a thread to stop running. (non-interruptible)
940 *
941 */
942 void
943 thread_wait(
944 thread_t thread)
945 {
946 wait_result_t wresult;
947 spl_t s = splsched();
948
949 wake_lock(thread);
950 thread_lock(thread);
951
952 while (thread->state & TH_RUN) {
953 processor_t processor = thread->last_processor;
954
955 if ( processor != PROCESSOR_NULL &&
956 processor->state == PROCESSOR_RUNNING &&
957 processor->active_thread == thread )
958 cause_ast_check(processor);
959 thread_unlock(thread);
960
961 thread->wake_active = TRUE;
962 wresult = assert_wait(&thread->wake_active, THREAD_UNINT);
963 wake_unlock(thread);
964 splx(s);
965
966 if (wresult == THREAD_WAITING)
967 thread_block(THREAD_CONTINUE_NULL);
968
969 s = splsched();
970 wake_lock(thread);
971 thread_lock(thread);
972 }
973
974 thread_unlock(thread);
975 wake_unlock(thread);
976 splx(s);
977 }
978
979 /*
980 * Routine: clear_wait_internal
981 *
982 * Clear the wait condition for the specified thread.
983 * Start the thread executing if that is appropriate.
984 * Arguments:
985 * thread thread to awaken
986 * result Wakeup result the thread should see
987 * Conditions:
988 * At splsched
989 * the thread is locked.
990 * Returns:
991 * KERN_SUCCESS thread was rousted out a wait
992 * KERN_FAILURE thread was waiting but could not be rousted
993 * KERN_NOT_WAITING thread was not waiting
994 */
995 __private_extern__ kern_return_t
996 clear_wait_internal(
997 thread_t thread,
998 wait_result_t wresult)
999 {
1000 wait_queue_t wq = thread->wait_queue;
1001 int i = LockTimeOut;
1002
1003 do {
1004 if (wresult == THREAD_INTERRUPTED && (thread->state & TH_UNINT))
1005 return (KERN_FAILURE);
1006
1007 if (wq != WAIT_QUEUE_NULL) {
1008 if (wait_queue_lock_try(wq)) {
1009 wait_queue_pull_thread_locked(wq, thread, TRUE);
1010 /* wait queue unlocked, thread still locked */
1011 }
1012 else {
1013 thread_unlock(thread);
1014 delay(1);
1015
1016 thread_lock(thread);
1017 if (wq != thread->wait_queue)
1018 return (KERN_NOT_WAITING);
1019
1020 continue;
1021 }
1022 }
1023
1024 return (thread_go(thread, wresult));
1025 } while (--i > 0);
1026
1027 panic("clear_wait_internal: deadlock: thread=0x%x, wq=0x%x, cpu=%d\n",
1028 thread, wq, cpu_number());
1029
1030 return (KERN_FAILURE);
1031 }
1032
1033
1034 /*
1035 * clear_wait:
1036 *
1037 * Clear the wait condition for the specified thread. Start the thread
1038 * executing if that is appropriate.
1039 *
1040 * parameters:
1041 * thread thread to awaken
1042 * result Wakeup result the thread should see
1043 */
1044 kern_return_t
1045 clear_wait(
1046 thread_t thread,
1047 wait_result_t result)
1048 {
1049 kern_return_t ret;
1050 spl_t s;
1051
1052 s = splsched();
1053 thread_lock(thread);
1054 ret = clear_wait_internal(thread, result);
1055 thread_unlock(thread);
1056 splx(s);
1057 return ret;
1058 }
1059
1060
1061 /*
1062 * thread_wakeup_prim:
1063 *
1064 * Common routine for thread_wakeup, thread_wakeup_with_result,
1065 * and thread_wakeup_one.
1066 *
1067 */
1068 kern_return_t
1069 thread_wakeup_prim(
1070 event_t event,
1071 boolean_t one_thread,
1072 wait_result_t result)
1073 {
1074 register wait_queue_t wq;
1075 register int index;
1076
1077 index = wait_hash(event);
1078 wq = &wait_queues[index];
1079 if (one_thread)
1080 return (wait_queue_wakeup_one(wq, event, result));
1081 else
1082 return (wait_queue_wakeup_all(wq, event, result));
1083 }
1084
1085 /*
1086 * thread_bind:
1087 *
1088 * Force a thread to execute on the specified processor.
1089 *
1090 * Returns the previous binding. PROCESSOR_NULL means
1091 * not bound.
1092 *
1093 * XXX - DO NOT export this to users - XXX
1094 */
1095 processor_t
1096 thread_bind(
1097 register thread_t thread,
1098 processor_t processor)
1099 {
1100 processor_t prev;
1101 run_queue_t runq = RUN_QUEUE_NULL;
1102 spl_t s;
1103
1104 s = splsched();
1105 thread_lock(thread);
1106 prev = thread->bound_processor;
1107 if (prev != PROCESSOR_NULL)
1108 runq = run_queue_remove(thread);
1109
1110 thread->bound_processor = processor;
1111
1112 if (runq != RUN_QUEUE_NULL)
1113 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1114 thread_unlock(thread);
1115 splx(s);
1116
1117 return (prev);
1118 }
1119
1120 struct {
1121 uint32_t idle_pset_last,
1122 idle_pset_any,
1123 idle_bound;
1124
1125 uint32_t pset_self,
1126 pset_last,
1127 pset_other,
1128 bound_self,
1129 bound_other;
1130
1131 uint32_t realtime_self,
1132 realtime_last,
1133 realtime_other;
1134
1135 uint32_t missed_realtime,
1136 missed_other;
1137 } dispatch_counts;
1138
1139 /*
1140 * Select a thread for the current processor to run.
1141 *
1142 * May select the current thread, which must be locked.
1143 */
1144 thread_t
1145 thread_select(
1146 register processor_t processor)
1147 {
1148 register thread_t thread;
1149 processor_set_t pset;
1150 boolean_t other_runnable;
1151
1152 /*
1153 * Check for other non-idle runnable threads.
1154 */
1155 pset = processor->processor_set;
1156 thread = processor->active_thread;
1157
1158 /* Update the thread's priority */
1159 if (thread->sched_stamp != sched_tick)
1160 update_priority(thread);
1161
1162 processor->current_pri = thread->sched_pri;
1163
1164 simple_lock(&pset->sched_lock);
1165
1166 other_runnable = processor->runq.count > 0 || pset->runq.count > 0;
1167
1168 if ( thread->state == TH_RUN &&
1169 thread->processor_set == pset &&
1170 (thread->bound_processor == PROCESSOR_NULL ||
1171 thread->bound_processor == processor) ) {
1172 if ( thread->sched_pri >= BASEPRI_RTQUEUES &&
1173 first_timeslice(processor) ) {
1174 if (pset->runq.highq >= BASEPRI_RTQUEUES) {
1175 register run_queue_t runq = &pset->runq;
1176 register queue_t q;
1177
1178 q = runq->queues + runq->highq;
1179 if (((thread_t)q->next)->realtime.deadline <
1180 processor->deadline) {
1181 thread = (thread_t)q->next;
1182 ((queue_entry_t)thread)->next->prev = q;
1183 q->next = ((queue_entry_t)thread)->next;
1184 thread->runq = RUN_QUEUE_NULL;
1185 assert(thread->sched_mode & TH_MODE_PREEMPT);
1186 runq->count--; runq->urgency--;
1187 if (queue_empty(q)) {
1188 if (runq->highq != IDLEPRI)
1189 clrbit(MAXPRI - runq->highq, runq->bitmap);
1190 runq->highq = MAXPRI - ffsbit(runq->bitmap);
1191 }
1192 }
1193 }
1194
1195 processor->deadline = thread->realtime.deadline;
1196
1197 simple_unlock(&pset->sched_lock);
1198
1199 return (thread);
1200 }
1201
1202 if ( (!other_runnable ||
1203 (processor->runq.highq < thread->sched_pri &&
1204 pset->runq.highq < thread->sched_pri)) ) {
1205
1206 /* I am the highest priority runnable (non-idle) thread */
1207
1208 processor->deadline = UINT64_MAX;
1209
1210 simple_unlock(&pset->sched_lock);
1211
1212 return (thread);
1213 }
1214 }
1215
1216 if (other_runnable)
1217 thread = choose_thread(pset, processor);
1218 else {
1219 /*
1220 * Nothing is runnable, so set this processor idle if it
1221 * was running. Return its idle thread.
1222 */
1223 if (processor->state == PROCESSOR_RUNNING) {
1224 remqueue(&pset->active_queue, (queue_entry_t)processor);
1225 processor->state = PROCESSOR_IDLE;
1226
1227 enqueue_tail(&pset->idle_queue, (queue_entry_t)processor);
1228 pset->idle_count++;
1229 }
1230
1231 processor->deadline = UINT64_MAX;
1232
1233 thread = processor->idle_thread;
1234 }
1235
1236 simple_unlock(&pset->sched_lock);
1237
1238 return (thread);
1239 }
1240
1241 /*
1242 * Perform a context switch and start executing the new thread.
1243 *
1244 * Returns FALSE on failure, and the thread is re-dispatched.
1245 *
1246 * Called at splsched.
1247 */
1248
1249 #define funnel_release_check(thread, debug) \
1250 MACRO_BEGIN \
1251 if ((thread)->funnel_state & TH_FN_OWNED) { \
1252 (thread)->funnel_state = TH_FN_REFUNNEL; \
1253 KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE, \
1254 (thread)->funnel_lock, (debug), 0, 0, 0); \
1255 funnel_unlock((thread)->funnel_lock); \
1256 } \
1257 MACRO_END
1258
1259 #define funnel_refunnel_check(thread, debug) \
1260 MACRO_BEGIN \
1261 if ((thread)->funnel_state & TH_FN_REFUNNEL) { \
1262 kern_return_t result = (thread)->wait_result; \
1263 \
1264 (thread)->funnel_state = 0; \
1265 KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE, \
1266 (thread)->funnel_lock, (debug), 0, 0, 0); \
1267 funnel_lock((thread)->funnel_lock); \
1268 KERNEL_DEBUG(0x6032430 | DBG_FUNC_NONE, \
1269 (thread)->funnel_lock, (debug), 0, 0, 0); \
1270 (thread)->funnel_state = TH_FN_OWNED; \
1271 (thread)->wait_result = result; \
1272 } \
1273 MACRO_END
1274
1275 boolean_t
1276 thread_invoke(
1277 register thread_t old_thread,
1278 register thread_t new_thread,
1279 ast_t reason)
1280 {
1281 thread_continue_t new_cont, continuation = old_thread->continuation;
1282 void *new_param, *parameter = old_thread->parameter;
1283 processor_t processor;
1284 thread_t prev_thread;
1285
1286 if (get_preemption_level() != 0)
1287 panic("thread_invoke: preemption_level %d\n",
1288 get_preemption_level());
1289
1290 assert(old_thread == current_thread());
1291
1292 /*
1293 * Mark thread interruptible.
1294 */
1295 thread_lock(new_thread);
1296 new_thread->state &= ~TH_UNINT;
1297
1298 assert(thread_runnable(new_thread));
1299
1300 /*
1301 * Allow time constraint threads to hang onto
1302 * a stack.
1303 */
1304 if ( (old_thread->sched_mode & TH_MODE_REALTIME) &&
1305 !old_thread->reserved_stack ) {
1306 old_thread->reserved_stack = old_thread->kernel_stack;
1307 }
1308
1309 if (continuation != NULL) {
1310 if (!new_thread->kernel_stack) {
1311 /*
1312 * If the old thread is using a privileged stack,
1313 * check to see whether we can exchange it with
1314 * that of the new thread.
1315 */
1316 if ( old_thread->kernel_stack == old_thread->reserved_stack &&
1317 !new_thread->reserved_stack)
1318 goto need_stack;
1319
1320 /*
1321 * Context switch by performing a stack handoff.
1322 */
1323 new_cont = new_thread->continuation;
1324 new_thread->continuation = NULL;
1325 new_param = new_thread->parameter;
1326 new_thread->parameter = NULL;
1327
1328 processor = current_processor();
1329 processor->active_thread = new_thread;
1330 processor->current_pri = new_thread->sched_pri;
1331 new_thread->last_processor = processor;
1332 ast_context(new_thread);
1333 thread_unlock(new_thread);
1334
1335 current_task()->csw++;
1336
1337 old_thread->reason = reason;
1338
1339 processor->last_dispatch = mach_absolute_time();
1340 timer_event((uint32_t)processor->last_dispatch,
1341 &new_thread->system_timer);
1342
1343 thread_done(old_thread, new_thread, processor);
1344
1345 machine_stack_handoff(old_thread, new_thread);
1346
1347 thread_begin(new_thread, processor);
1348
1349 /*
1350 * Now dispatch the old thread.
1351 */
1352 thread_dispatch(old_thread);
1353
1354 counter_always(c_thread_invoke_hits++);
1355
1356 funnel_refunnel_check(new_thread, 2);
1357 (void) spllo();
1358
1359 assert(new_cont);
1360 call_continuation(new_cont, new_param, new_thread->wait_result);
1361 /*NOTREACHED*/
1362 }
1363 else
1364 if (new_thread == old_thread) {
1365 /* same thread but with continuation */
1366 counter(++c_thread_invoke_same);
1367 thread_unlock(new_thread);
1368
1369 funnel_refunnel_check(new_thread, 3);
1370 (void) spllo();
1371
1372 call_continuation(continuation, parameter, new_thread->wait_result);
1373 /*NOTREACHED*/
1374 }
1375 }
1376 else {
1377 /*
1378 * Check that the new thread has a stack
1379 */
1380 if (!new_thread->kernel_stack) {
1381 need_stack:
1382 if (!stack_alloc_try(new_thread)) {
1383 counter_always(c_thread_invoke_misses++);
1384 thread_unlock(new_thread);
1385 thread_stack_enqueue(new_thread);
1386 return (FALSE);
1387 }
1388 }
1389 else
1390 if (new_thread == old_thread) {
1391 counter(++c_thread_invoke_same);
1392 thread_unlock(new_thread);
1393 return (TRUE);
1394 }
1395 }
1396
1397 /*
1398 * Context switch by full context save.
1399 */
1400 processor = current_processor();
1401 processor->active_thread = new_thread;
1402 processor->current_pri = new_thread->sched_pri;
1403 new_thread->last_processor = processor;
1404 ast_context(new_thread);
1405 assert(thread_runnable(new_thread));
1406 thread_unlock(new_thread);
1407
1408 counter_always(c_thread_invoke_csw++);
1409 current_task()->csw++;
1410
1411 assert(old_thread->runq == RUN_QUEUE_NULL);
1412 old_thread->reason = reason;
1413
1414 processor->last_dispatch = mach_absolute_time();
1415 timer_event((uint32_t)processor->last_dispatch, &new_thread->system_timer);
1416
1417 thread_done(old_thread, new_thread, processor);
1418
1419 /*
1420 * This is where we actually switch register context,
1421 * and address space if required. Control will not
1422 * return here immediately.
1423 */
1424 prev_thread = machine_switch_context(old_thread, continuation, new_thread);
1425
1426 /*
1427 * We are still old_thread, possibly on a different processor,
1428 * and new_thread is now stale.
1429 */
1430 thread_begin(old_thread, old_thread->last_processor);
1431
1432 /*
1433 * Now dispatch the thread which resumed us.
1434 */
1435 thread_dispatch(prev_thread);
1436
1437 if (continuation) {
1438 funnel_refunnel_check(old_thread, 3);
1439 (void) spllo();
1440
1441 call_continuation(continuation, parameter, old_thread->wait_result);
1442 /*NOTREACHED*/
1443 }
1444
1445 return (TRUE);
1446 }
1447
1448 /*
1449 * thread_done:
1450 *
1451 * Perform calculations for thread
1452 * finishing execution on the current processor.
1453 *
1454 * Called at splsched.
1455 */
1456 void
1457 thread_done(
1458 thread_t old_thread,
1459 thread_t new_thread,
1460 processor_t processor)
1461 {
1462 if (!(old_thread->state & TH_IDLE)) {
1463 /*
1464 * Compute remainder of current quantum.
1465 */
1466 if ( first_timeslice(processor) &&
1467 processor->quantum_end > processor->last_dispatch )
1468 old_thread->current_quantum =
1469 (processor->quantum_end - processor->last_dispatch);
1470 else
1471 old_thread->current_quantum = 0;
1472
1473 if (old_thread->sched_mode & TH_MODE_REALTIME) {
1474 /*
1475 * Cancel the deadline if the thread has
1476 * consumed the entire quantum.
1477 */
1478 if (old_thread->current_quantum == 0) {
1479 old_thread->realtime.deadline = UINT64_MAX;
1480 old_thread->reason |= AST_QUANTUM;
1481 }
1482 }
1483 else {
1484 /*
1485 * For non-realtime threads treat a tiny
1486 * remaining quantum as an expired quantum
1487 * but include what's left next time.
1488 */
1489 if (old_thread->current_quantum < min_std_quantum) {
1490 old_thread->reason |= AST_QUANTUM;
1491 old_thread->current_quantum += std_quantum;
1492 }
1493 }
1494
1495 /*
1496 * If we are doing a direct handoff then
1497 * give the remainder of our quantum to
1498 * the next thread.
1499 */
1500 if ((old_thread->reason & (AST_HANDOFF|AST_QUANTUM)) == AST_HANDOFF) {
1501 new_thread->current_quantum = old_thread->current_quantum;
1502 old_thread->reason |= AST_QUANTUM;
1503 old_thread->current_quantum = 0;
1504 }
1505
1506 old_thread->last_switch = processor->last_dispatch;
1507
1508 old_thread->computation_metered +=
1509 (old_thread->last_switch - old_thread->computation_epoch);
1510 }
1511 }
1512
1513 /*
1514 * thread_begin:
1515 *
1516 * Set up for thread beginning execution on
1517 * the current processor.
1518 *
1519 * Called at splsched.
1520 */
1521 void
1522 thread_begin(
1523 thread_t thread,
1524 processor_t processor)
1525 {
1526 if (!(thread->state & TH_IDLE)) {
1527 /*
1528 * Give the thread a new quantum
1529 * if none remaining.
1530 */
1531 if (thread->current_quantum == 0)
1532 thread_quantum_init(thread);
1533
1534 /*
1535 * Set up quantum timer and timeslice.
1536 */
1537 processor->quantum_end =
1538 (processor->last_dispatch + thread->current_quantum);
1539 timer_call_enter1(&processor->quantum_timer,
1540 thread, processor->quantum_end);
1541
1542 processor_timeslice_setup(processor, thread);
1543
1544 thread->last_switch = processor->last_dispatch;
1545
1546 thread->computation_epoch = thread->last_switch;
1547 }
1548 else {
1549 timer_call_cancel(&processor->quantum_timer);
1550 processor->timeslice = 1;
1551 }
1552 }
1553
1554 /*
1555 * thread_dispatch:
1556 *
1557 * Handle previous thread at context switch. Re-dispatch
1558 * if still running, otherwise update run state and perform
1559 * special actions.
1560 *
1561 * Called at splsched.
1562 */
1563 void
1564 thread_dispatch(
1565 register thread_t thread)
1566 {
1567 /*
1568 * If blocked at a continuation, discard
1569 * the stack.
1570 */
1571 if (thread->continuation != NULL && thread->kernel_stack)
1572 stack_free(thread);
1573
1574 if (!(thread->state & TH_IDLE)) {
1575 wake_lock(thread);
1576 thread_lock(thread);
1577
1578 if (!(thread->state & TH_WAIT)) {
1579 /*
1580 * Still running.
1581 */
1582 if (thread->reason & AST_QUANTUM)
1583 thread_setrun(thread, SCHED_TAILQ);
1584 else
1585 if (thread->reason & AST_PREEMPT)
1586 thread_setrun(thread, SCHED_HEADQ);
1587 else
1588 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1589
1590 thread->reason = AST_NONE;
1591
1592 thread_unlock(thread);
1593 wake_unlock(thread);
1594 }
1595 else {
1596 boolean_t wake;
1597
1598 /*
1599 * Waiting.
1600 */
1601 thread->state &= ~TH_RUN;
1602
1603 wake = thread->wake_active;
1604 thread->wake_active = FALSE;
1605
1606 if (thread->sched_mode & TH_MODE_TIMESHARE)
1607 pset_share_decr(thread->processor_set);
1608 pset_run_decr(thread->processor_set);
1609
1610 thread_unlock(thread);
1611 wake_unlock(thread);
1612
1613 if (thread->options & TH_OPT_CALLOUT)
1614 call_thread_block();
1615
1616 if (wake)
1617 thread_wakeup((event_t)&thread->wake_active);
1618
1619 if (thread->state & TH_TERMINATE)
1620 thread_terminate_enqueue(thread);
1621 }
1622 }
1623 }
1624
1625 /*
1626 * thread_block_reason:
1627 *
1628 * Forces a reschedule, blocking the caller if a wait
1629 * has been asserted.
1630 *
1631 * If a continuation is specified, then thread_invoke will
1632 * attempt to discard the thread's kernel stack. When the
1633 * thread resumes, it will execute the continuation function
1634 * on a new kernel stack.
1635 */
1636 counter(mach_counter_t c_thread_block_calls = 0;)
1637
1638 wait_result_t
1639 thread_block_reason(
1640 thread_continue_t continuation,
1641 void *parameter,
1642 ast_t reason)
1643 {
1644 register thread_t self = current_thread();
1645 register processor_t processor;
1646 register thread_t new_thread;
1647 spl_t s;
1648
1649 counter(++c_thread_block_calls);
1650
1651 s = splsched();
1652
1653 #if 0
1654 #if MACH_KDB
1655 {
1656 extern void db_chkpmgr(void);
1657 db_chkpmgr(); /* (BRINGUP) See if pm config changed */
1658
1659 }
1660 #endif
1661 #endif
1662
1663 if (!(reason & AST_PREEMPT))
1664 funnel_release_check(self, 2);
1665
1666 processor = current_processor();
1667
1668 /*
1669 * Delay switching to the idle thread under certain conditions.
1670 */
1671 if (s != FALSE && (self->state & (TH_IDLE|TH_TERMINATE|TH_WAIT)) == TH_WAIT) {
1672 if ( processor->processor_set->processor_count > 1 &&
1673 processor->processor_set->runq.count == 0 &&
1674 processor->runq.count == 0 )
1675 processor = delay_idle(processor, self);
1676 }
1677
1678 /* If we're explicitly yielding, force a subsequent quantum */
1679 if (reason & AST_YIELD)
1680 processor->timeslice = 0;
1681
1682 /* We're handling all scheduling AST's */
1683 ast_off(AST_SCHEDULING);
1684
1685 self->continuation = continuation;
1686 self->parameter = parameter;
1687
1688 thread_lock(self);
1689 new_thread = thread_select(processor);
1690 assert(new_thread && thread_runnable(new_thread));
1691 thread_unlock(self);
1692 while (!thread_invoke(self, new_thread, reason)) {
1693 thread_lock(self);
1694 new_thread = thread_select(processor);
1695 assert(new_thread && thread_runnable(new_thread));
1696 thread_unlock(self);
1697 }
1698
1699 funnel_refunnel_check(self, 5);
1700 splx(s);
1701
1702 return (self->wait_result);
1703 }
1704
1705 /*
1706 * thread_block:
1707 *
1708 * Block the current thread if a wait has been asserted.
1709 */
1710 wait_result_t
1711 thread_block(
1712 thread_continue_t continuation)
1713 {
1714 return thread_block_reason(continuation, NULL, AST_NONE);
1715 }
1716
1717 wait_result_t
1718 thread_block_parameter(
1719 thread_continue_t continuation,
1720 void *parameter)
1721 {
1722 return thread_block_reason(continuation, parameter, AST_NONE);
1723 }
1724
1725 /*
1726 * thread_run:
1727 *
1728 * Switch directly from the current thread to the
1729 * new thread, handing off our quantum if appropriate.
1730 *
1731 * New thread must be runnable, and not on a run queue.
1732 *
1733 * Called at splsched.
1734 */
1735 int
1736 thread_run(
1737 thread_t self,
1738 thread_continue_t continuation,
1739 void *parameter,
1740 thread_t new_thread)
1741 {
1742 ast_t handoff = AST_HANDOFF;
1743
1744 funnel_release_check(self, 3);
1745
1746 self->continuation = continuation;
1747 self->parameter = parameter;
1748
1749 while (!thread_invoke(self, new_thread, handoff)) {
1750 register processor_t processor = current_processor();
1751
1752 thread_lock(self);
1753 new_thread = thread_select(processor);
1754 thread_unlock(self);
1755 handoff = AST_NONE;
1756 }
1757
1758 funnel_refunnel_check(self, 6);
1759
1760 return (self->wait_result);
1761 }
1762
1763 /*
1764 * thread_continue:
1765 *
1766 * Called at splsched when a thread first receives
1767 * a new stack after a continuation.
1768 */
1769 void
1770 thread_continue(
1771 register thread_t old_thread)
1772 {
1773 register thread_t self = current_thread();
1774 register thread_continue_t continuation;
1775 register void *parameter;
1776
1777 continuation = self->continuation;
1778 self->continuation = NULL;
1779 parameter = self->parameter;
1780 self->parameter = NULL;
1781
1782 thread_begin(self, self->last_processor);
1783
1784 if (old_thread != THREAD_NULL)
1785 thread_dispatch(old_thread);
1786
1787 funnel_refunnel_check(self, 4);
1788
1789 if (old_thread != THREAD_NULL)
1790 (void)spllo();
1791
1792 call_continuation(continuation, parameter, self->wait_result);
1793 /*NOTREACHED*/
1794 }
1795
1796 /*
1797 * Enqueue thread on run queue. Thread must be locked,
1798 * and not already be on a run queue. Returns TRUE
1799 * if a preemption is indicated based on the state
1800 * of the run queue.
1801 *
1802 * Run queue must be locked, see run_queue_remove()
1803 * for more info.
1804 */
1805 static boolean_t
1806 run_queue_enqueue(
1807 register run_queue_t rq,
1808 register thread_t thread,
1809 integer_t options)
1810 {
1811 register int whichq = thread->sched_pri;
1812 register queue_t queue = &rq->queues[whichq];
1813 boolean_t result = FALSE;
1814
1815 assert(whichq >= MINPRI && whichq <= MAXPRI);
1816
1817 assert(thread->runq == RUN_QUEUE_NULL);
1818 if (queue_empty(queue)) {
1819 enqueue_tail(queue, (queue_entry_t)thread);
1820
1821 setbit(MAXPRI - whichq, rq->bitmap);
1822 if (whichq > rq->highq) {
1823 rq->highq = whichq;
1824 result = TRUE;
1825 }
1826 }
1827 else
1828 if (options & SCHED_HEADQ)
1829 enqueue_head(queue, (queue_entry_t)thread);
1830 else
1831 enqueue_tail(queue, (queue_entry_t)thread);
1832
1833 thread->runq = rq;
1834 if (thread->sched_mode & TH_MODE_PREEMPT)
1835 rq->urgency++;
1836 rq->count++;
1837
1838 return (result);
1839 }
1840
1841 /*
1842 * Enqueue a thread for realtime execution, similar
1843 * to above. Handles preemption directly.
1844 */
1845 static void
1846 realtime_schedule_insert(
1847 register processor_set_t pset,
1848 register thread_t thread)
1849 {
1850 register run_queue_t rq = &pset->runq;
1851 register int whichq = thread->sched_pri;
1852 register queue_t queue = &rq->queues[whichq];
1853 uint64_t deadline = thread->realtime.deadline;
1854 boolean_t try_preempt = FALSE;
1855
1856 assert(whichq >= BASEPRI_REALTIME && whichq <= MAXPRI);
1857
1858 assert(thread->runq == RUN_QUEUE_NULL);
1859 if (queue_empty(queue)) {
1860 enqueue_tail(queue, (queue_entry_t)thread);
1861
1862 setbit(MAXPRI - whichq, rq->bitmap);
1863 if (whichq > rq->highq)
1864 rq->highq = whichq;
1865 try_preempt = TRUE;
1866 }
1867 else {
1868 register thread_t entry = (thread_t)queue_first(queue);
1869
1870 while (TRUE) {
1871 if ( queue_end(queue, (queue_entry_t)entry) ||
1872 deadline < entry->realtime.deadline ) {
1873 entry = (thread_t)queue_prev((queue_entry_t)entry);
1874 break;
1875 }
1876
1877 entry = (thread_t)queue_next((queue_entry_t)entry);
1878 }
1879
1880 if ((queue_entry_t)entry == queue)
1881 try_preempt = TRUE;
1882
1883 insque((queue_entry_t)thread, (queue_entry_t)entry);
1884 }
1885
1886 thread->runq = rq;
1887 assert(thread->sched_mode & TH_MODE_PREEMPT);
1888 rq->count++; rq->urgency++;
1889
1890 if (try_preempt) {
1891 register processor_t processor;
1892
1893 processor = current_processor();
1894 if ( pset == processor->processor_set &&
1895 (thread->sched_pri > processor->current_pri ||
1896 deadline < processor->deadline ) ) {
1897 dispatch_counts.realtime_self++;
1898 simple_unlock(&pset->sched_lock);
1899
1900 ast_on(AST_PREEMPT | AST_URGENT);
1901 return;
1902 }
1903
1904 if ( pset->processor_count > 1 ||
1905 pset != processor->processor_set ) {
1906 processor_t myprocessor, lastprocessor;
1907 queue_entry_t next;
1908
1909 myprocessor = processor;
1910 processor = thread->last_processor;
1911 if ( processor != myprocessor &&
1912 processor != PROCESSOR_NULL &&
1913 processor->processor_set == pset &&
1914 processor->state == PROCESSOR_RUNNING &&
1915 (thread->sched_pri > processor->current_pri ||
1916 deadline < processor->deadline ) ) {
1917 dispatch_counts.realtime_last++;
1918 cause_ast_check(processor);
1919 simple_unlock(&pset->sched_lock);
1920 return;
1921 }
1922
1923 lastprocessor = processor;
1924 queue = &pset->active_queue;
1925 processor = (processor_t)queue_first(queue);
1926 while (!queue_end(queue, (queue_entry_t)processor)) {
1927 next = queue_next((queue_entry_t)processor);
1928
1929 if ( processor != myprocessor &&
1930 processor != lastprocessor &&
1931 (thread->sched_pri > processor->current_pri ||
1932 deadline < processor->deadline ) ) {
1933 if (!queue_end(queue, next)) {
1934 remqueue(queue, (queue_entry_t)processor);
1935 enqueue_tail(queue, (queue_entry_t)processor);
1936 }
1937 dispatch_counts.realtime_other++;
1938 cause_ast_check(processor);
1939 simple_unlock(&pset->sched_lock);
1940 return;
1941 }
1942
1943 processor = (processor_t)next;
1944 }
1945 }
1946 }
1947
1948 simple_unlock(&pset->sched_lock);
1949 }
1950
1951 /*
1952 * thread_setrun:
1953 *
1954 * Dispatch thread for execution, directly onto an idle
1955 * processor if possible. Else put on appropriate run
1956 * queue. (local if bound, else processor set)
1957 *
1958 * Thread must be locked.
1959 */
1960 void
1961 thread_setrun(
1962 register thread_t new_thread,
1963 integer_t options)
1964 {
1965 register processor_t processor;
1966 register processor_set_t pset;
1967 register thread_t thread;
1968 ast_t preempt = (options & SCHED_PREEMPT)?
1969 AST_PREEMPT: AST_NONE;
1970
1971 assert(thread_runnable(new_thread));
1972
1973 /*
1974 * Update priority if needed.
1975 */
1976 if (new_thread->sched_stamp != sched_tick)
1977 update_priority(new_thread);
1978
1979 /*
1980 * Check for urgent preemption.
1981 */
1982 if (new_thread->sched_mode & TH_MODE_PREEMPT)
1983 preempt = (AST_PREEMPT | AST_URGENT);
1984
1985 assert(new_thread->runq == RUN_QUEUE_NULL);
1986
1987 if ((processor = new_thread->bound_processor) == PROCESSOR_NULL) {
1988 /*
1989 * First try to dispatch on
1990 * the last processor.
1991 */
1992 pset = new_thread->processor_set;
1993 processor = new_thread->last_processor;
1994 if ( pset->processor_count > 1 &&
1995 processor != PROCESSOR_NULL &&
1996 processor->state == PROCESSOR_IDLE ) {
1997 processor_lock(processor);
1998 simple_lock(&pset->sched_lock);
1999 if ( processor->processor_set == pset &&
2000 processor->state == PROCESSOR_IDLE ) {
2001 remqueue(&pset->idle_queue, (queue_entry_t)processor);
2002 pset->idle_count--;
2003 processor->next_thread = new_thread;
2004 if (new_thread->sched_pri >= BASEPRI_RTQUEUES)
2005 processor->deadline = new_thread->realtime.deadline;
2006 else
2007 processor->deadline = UINT64_MAX;
2008 processor->state = PROCESSOR_DISPATCHING;
2009 dispatch_counts.idle_pset_last++;
2010 simple_unlock(&pset->sched_lock);
2011 processor_unlock(processor);
2012 if (processor != current_processor())
2013 machine_signal_idle(processor);
2014 return;
2015 }
2016 processor_unlock(processor);
2017 }
2018 else
2019 simple_lock(&pset->sched_lock);
2020
2021 /*
2022 * Next pick any idle processor
2023 * in the processor set.
2024 */
2025 if (pset->idle_count > 0) {
2026 processor = (processor_t)dequeue_head(&pset->idle_queue);
2027 pset->idle_count--;
2028 processor->next_thread = new_thread;
2029 if (new_thread->sched_pri >= BASEPRI_RTQUEUES)
2030 processor->deadline = new_thread->realtime.deadline;
2031 else
2032 processor->deadline = UINT64_MAX;
2033 processor->state = PROCESSOR_DISPATCHING;
2034 dispatch_counts.idle_pset_any++;
2035 simple_unlock(&pset->sched_lock);
2036 if (processor != current_processor())
2037 machine_signal_idle(processor);
2038 return;
2039 }
2040
2041 if (new_thread->sched_pri >= BASEPRI_RTQUEUES)
2042 realtime_schedule_insert(pset, new_thread);
2043 else {
2044 if (!run_queue_enqueue(&pset->runq, new_thread, options))
2045 preempt = AST_NONE;
2046
2047 /*
2048 * Update the timesharing quanta.
2049 */
2050 timeshare_quanta_update(pset);
2051
2052 /*
2053 * Preempt check.
2054 */
2055 if (preempt != AST_NONE) {
2056 /*
2057 * First try the current processor
2058 * if it is a member of the correct
2059 * processor set.
2060 */
2061 processor = current_processor();
2062 thread = processor->active_thread;
2063 if ( pset == processor->processor_set &&
2064 csw_needed(thread, processor) ) {
2065 dispatch_counts.pset_self++;
2066 simple_unlock(&pset->sched_lock);
2067
2068 ast_on(preempt);
2069 return;
2070 }
2071
2072 /*
2073 * If that failed and we have other
2074 * processors available keep trying.
2075 */
2076 if ( pset->processor_count > 1 ||
2077 pset != processor->processor_set ) {
2078 queue_t queue = &pset->active_queue;
2079 processor_t myprocessor, lastprocessor;
2080 queue_entry_t next;
2081
2082 /*
2083 * Next try the last processor
2084 * dispatched on.
2085 */
2086 myprocessor = processor;
2087 processor = new_thread->last_processor;
2088 if ( processor != myprocessor &&
2089 processor != PROCESSOR_NULL &&
2090 processor->processor_set == pset &&
2091 processor->state == PROCESSOR_RUNNING &&
2092 new_thread->sched_pri > processor->current_pri ) {
2093 dispatch_counts.pset_last++;
2094 cause_ast_check(processor);
2095 simple_unlock(&pset->sched_lock);
2096 return;
2097 }
2098
2099 /*
2100 * Lastly, pick any other
2101 * available processor.
2102 */
2103 lastprocessor = processor;
2104 processor = (processor_t)queue_first(queue);
2105 while (!queue_end(queue, (queue_entry_t)processor)) {
2106 next = queue_next((queue_entry_t)processor);
2107
2108 if ( processor != myprocessor &&
2109 processor != lastprocessor &&
2110 new_thread->sched_pri >
2111 processor->current_pri ) {
2112 if (!queue_end(queue, next)) {
2113 remqueue(queue, (queue_entry_t)processor);
2114 enqueue_tail(queue, (queue_entry_t)processor);
2115 }
2116 dispatch_counts.pset_other++;
2117 cause_ast_check(processor);
2118 simple_unlock(&pset->sched_lock);
2119 return;
2120 }
2121
2122 processor = (processor_t)next;
2123 }
2124 }
2125 }
2126
2127 simple_unlock(&pset->sched_lock);
2128 }
2129 }
2130 else {
2131 /*
2132 * Bound, can only run on bound processor. Have to lock
2133 * processor here because it may not be the current one.
2134 */
2135 processor_lock(processor);
2136 pset = processor->processor_set;
2137 if (pset != PROCESSOR_SET_NULL) {
2138 simple_lock(&pset->sched_lock);
2139 if (processor->state == PROCESSOR_IDLE) {
2140 remqueue(&pset->idle_queue, (queue_entry_t)processor);
2141 pset->idle_count--;
2142 processor->next_thread = new_thread;
2143 processor->deadline = UINT64_MAX;
2144 processor->state = PROCESSOR_DISPATCHING;
2145 dispatch_counts.idle_bound++;
2146 simple_unlock(&pset->sched_lock);
2147 processor_unlock(processor);
2148 if (processor != current_processor())
2149 machine_signal_idle(processor);
2150 return;
2151 }
2152 }
2153
2154 if (!run_queue_enqueue(&processor->runq, new_thread, options))
2155 preempt = AST_NONE;
2156
2157 if (preempt != AST_NONE) {
2158 if (processor == current_processor()) {
2159 thread = processor->active_thread;
2160 if (csw_needed(thread, processor)) {
2161 dispatch_counts.bound_self++;
2162 ast_on(preempt);
2163 }
2164 }
2165 else
2166 if ( processor->state == PROCESSOR_RUNNING &&
2167 new_thread->sched_pri > processor->current_pri ) {
2168 dispatch_counts.bound_other++;
2169 cause_ast_check(processor);
2170 }
2171 }
2172
2173 if (pset != PROCESSOR_SET_NULL)
2174 simple_unlock(&pset->sched_lock);
2175
2176 processor_unlock(processor);
2177 }
2178 }
2179
2180 /*
2181 * Check for a possible preemption point in
2182 * the (current) thread.
2183 *
2184 * Called at splsched.
2185 */
2186 ast_t
2187 csw_check(
2188 thread_t thread,
2189 processor_t processor)
2190 {
2191 int current_pri = thread->sched_pri;
2192 ast_t result = AST_NONE;
2193 run_queue_t runq;
2194
2195 if (first_timeslice(processor)) {
2196 runq = &processor->processor_set->runq;
2197 if (runq->highq >= BASEPRI_RTQUEUES)
2198 return (AST_PREEMPT | AST_URGENT);
2199
2200 if (runq->highq > current_pri) {
2201 if (runq->urgency > 0)
2202 return (AST_PREEMPT | AST_URGENT);
2203
2204 result |= AST_PREEMPT;
2205 }
2206
2207 runq = &processor->runq;
2208 if (runq->highq > current_pri) {
2209 if (runq->urgency > 0)
2210 return (AST_PREEMPT | AST_URGENT);
2211
2212 result |= AST_PREEMPT;
2213 }
2214 }
2215 else {
2216 runq = &processor->processor_set->runq;
2217 if (runq->highq >= current_pri) {
2218 if (runq->urgency > 0)
2219 return (AST_PREEMPT | AST_URGENT);
2220
2221 result |= AST_PREEMPT;
2222 }
2223
2224 runq = &processor->runq;
2225 if (runq->highq >= current_pri) {
2226 if (runq->urgency > 0)
2227 return (AST_PREEMPT | AST_URGENT);
2228
2229 result |= AST_PREEMPT;
2230 }
2231 }
2232
2233 if (result != AST_NONE)
2234 return (result);
2235
2236 if (thread->state & TH_SUSP)
2237 result |= AST_PREEMPT;
2238
2239 return (result);
2240 }
2241
2242 /*
2243 * set_sched_pri:
2244 *
2245 * Set the scheduled priority of the specified thread.
2246 *
2247 * This may cause the thread to change queues.
2248 *
2249 * Thread must be locked.
2250 */
2251 void
2252 set_sched_pri(
2253 thread_t thread,
2254 int priority)
2255 {
2256 register struct run_queue *rq = run_queue_remove(thread);
2257
2258 if ( !(thread->sched_mode & TH_MODE_TIMESHARE) &&
2259 (priority >= BASEPRI_PREEMPT ||
2260 (thread->task_priority < MINPRI_KERNEL &&
2261 thread->task_priority >= BASEPRI_BACKGROUND &&
2262 priority > thread->task_priority) ) )
2263 thread->sched_mode |= TH_MODE_PREEMPT;
2264 else
2265 thread->sched_mode &= ~TH_MODE_PREEMPT;
2266
2267 thread->sched_pri = priority;
2268 if (rq != RUN_QUEUE_NULL)
2269 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
2270 else
2271 if (thread->state & TH_RUN) {
2272 processor_t processor = thread->last_processor;
2273
2274 if (thread == current_thread()) {
2275 ast_t preempt = csw_check(thread, processor);
2276
2277 if (preempt != AST_NONE)
2278 ast_on(preempt);
2279 processor->current_pri = priority;
2280 }
2281 else
2282 if ( processor != PROCESSOR_NULL &&
2283 processor->active_thread == thread )
2284 cause_ast_check(processor);
2285 }
2286 }
2287
2288 #if 0
2289
2290 static void
2291 run_queue_check(
2292 run_queue_t rq,
2293 thread_t thread)
2294 {
2295 queue_t q;
2296 queue_entry_t qe;
2297
2298 if (rq != thread->runq)
2299 panic("run_queue_check: thread runq");
2300
2301 if (thread->sched_pri > MAXPRI || thread->sched_pri < MINPRI)
2302 panic("run_queue_check: thread sched_pri");
2303
2304 q = &rq->queues[thread->sched_pri];
2305 qe = queue_first(q);
2306 while (!queue_end(q, qe)) {
2307 if (qe == (queue_entry_t)thread)
2308 return;
2309
2310 qe = queue_next(qe);
2311 }
2312
2313 panic("run_queue_check: end");
2314 }
2315
2316 #endif /* DEBUG */
2317
2318 /*
2319 * run_queue_remove:
2320 *
2321 * Remove a thread from its current run queue and
2322 * return the run queue if successful.
2323 *
2324 * Thread must be locked.
2325 */
2326 run_queue_t
2327 run_queue_remove(
2328 thread_t thread)
2329 {
2330 register run_queue_t rq = thread->runq;
2331
2332 /*
2333 * If rq is RUN_QUEUE_NULL, the thread will stay out of the
2334 * run queues because the caller locked the thread. Otherwise
2335 * the thread is on a run queue, but could be chosen for dispatch
2336 * and removed.
2337 */
2338 if (rq != RUN_QUEUE_NULL) {
2339 processor_set_t pset = thread->processor_set;
2340 processor_t processor = thread->bound_processor;
2341
2342 /*
2343 * The run queues are locked by the pset scheduling
2344 * lock, except when a processor is off-line the
2345 * local run queue is locked by the processor lock.
2346 */
2347 if (processor != PROCESSOR_NULL) {
2348 processor_lock(processor);
2349 pset = processor->processor_set;
2350 }
2351
2352 if (pset != PROCESSOR_SET_NULL)
2353 simple_lock(&pset->sched_lock);
2354
2355 if (rq == thread->runq) {
2356 /*
2357 * Thread is on a run queue and we have a lock on
2358 * that run queue.
2359 */
2360 remqueue(&rq->queues[0], (queue_entry_t)thread);
2361 rq->count--;
2362 if (thread->sched_mode & TH_MODE_PREEMPT)
2363 rq->urgency--;
2364 assert(rq->urgency >= 0);
2365
2366 if (queue_empty(rq->queues + thread->sched_pri)) {
2367 /* update run queue status */
2368 if (thread->sched_pri != IDLEPRI)
2369 clrbit(MAXPRI - thread->sched_pri, rq->bitmap);
2370 rq->highq = MAXPRI - ffsbit(rq->bitmap);
2371 }
2372
2373 thread->runq = RUN_QUEUE_NULL;
2374 }
2375 else {
2376 /*
2377 * The thread left the run queue before we could
2378 * lock the run queue.
2379 */
2380 assert(thread->runq == RUN_QUEUE_NULL);
2381 rq = RUN_QUEUE_NULL;
2382 }
2383
2384 if (pset != PROCESSOR_SET_NULL)
2385 simple_unlock(&pset->sched_lock);
2386
2387 if (processor != PROCESSOR_NULL)
2388 processor_unlock(processor);
2389 }
2390
2391 return (rq);
2392 }
2393
2394 /*
2395 * choose_thread:
2396 *
2397 * Remove a thread to execute from the run queues
2398 * and return it.
2399 *
2400 * Called with pset scheduling lock held.
2401 */
2402 static thread_t
2403 choose_thread(
2404 processor_set_t pset,
2405 processor_t processor)
2406 {
2407 register run_queue_t runq;
2408 register thread_t thread;
2409 register queue_t q;
2410
2411 runq = &processor->runq;
2412
2413 if (runq->count > 0 && runq->highq >= pset->runq.highq) {
2414 q = runq->queues + runq->highq;
2415
2416 thread = (thread_t)q->next;
2417 ((queue_entry_t)thread)->next->prev = q;
2418 q->next = ((queue_entry_t)thread)->next;
2419 thread->runq = RUN_QUEUE_NULL;
2420 runq->count--;
2421 if (thread->sched_mode & TH_MODE_PREEMPT)
2422 runq->urgency--;
2423 assert(runq->urgency >= 0);
2424 if (queue_empty(q)) {
2425 if (runq->highq != IDLEPRI)
2426 clrbit(MAXPRI - runq->highq, runq->bitmap);
2427 runq->highq = MAXPRI - ffsbit(runq->bitmap);
2428 }
2429
2430 processor->deadline = UINT64_MAX;
2431
2432 return (thread);
2433 }
2434
2435 runq = &pset->runq;
2436
2437 assert(runq->count > 0);
2438 q = runq->queues + runq->highq;
2439
2440 thread = (thread_t)q->next;
2441 ((queue_entry_t)thread)->next->prev = q;
2442 q->next = ((queue_entry_t)thread)->next;
2443 thread->runq = RUN_QUEUE_NULL;
2444 runq->count--;
2445 if (runq->highq >= BASEPRI_RTQUEUES)
2446 processor->deadline = thread->realtime.deadline;
2447 else
2448 processor->deadline = UINT64_MAX;
2449 if (thread->sched_mode & TH_MODE_PREEMPT)
2450 runq->urgency--;
2451 assert(runq->urgency >= 0);
2452 if (queue_empty(q)) {
2453 if (runq->highq != IDLEPRI)
2454 clrbit(MAXPRI - runq->highq, runq->bitmap);
2455 runq->highq = MAXPRI - ffsbit(runq->bitmap);
2456 }
2457
2458 timeshare_quanta_update(pset);
2459
2460 return (thread);
2461 }
2462
2463 static processor_t
2464 delay_idle(
2465 processor_t processor,
2466 thread_t self)
2467 {
2468 int *gcount, *lcount;
2469 uint64_t abstime, spin, limit;
2470
2471 lcount = &processor->runq.count;
2472 gcount = &processor->processor_set->runq.count;
2473
2474 abstime = mach_absolute_time();
2475 limit = abstime + delay_idle_limit;
2476 spin = abstime + delay_idle_spin;
2477
2478 timer_event((uint32_t)abstime, &processor->idle_thread->system_timer);
2479
2480 self->options |= TH_OPT_DELAYIDLE;
2481
2482 while ( *gcount == 0 && *lcount == 0 &&
2483 (self->state & TH_WAIT) != 0 &&
2484 abstime < limit ) {
2485 if (abstime >= spin) {
2486 (void)spllo();
2487
2488 (void)splsched();
2489 processor = current_processor();
2490 lcount = &processor->runq.count;
2491 gcount = &processor->processor_set->runq.count;
2492
2493 abstime = mach_absolute_time();
2494 spin = abstime + delay_idle_spin;
2495
2496 timer_event((uint32_t)abstime, &processor->idle_thread->system_timer);
2497 }
2498 else {
2499 cpu_pause();
2500 abstime = mach_absolute_time();
2501 }
2502 }
2503
2504 timer_event((uint32_t)abstime, &self->system_timer);
2505
2506 self->options &= ~TH_OPT_DELAYIDLE;
2507
2508 return (processor);
2509 }
2510
2511 /*
2512 * no_dispatch_count counts number of times processors go non-idle
2513 * without being dispatched. This should be very rare.
2514 */
2515 int no_dispatch_count = 0;
2516
2517 /*
2518 * This is the idle processor thread, which just looks for other threads
2519 * to execute.
2520 */
2521 void
2522 idle_thread(void)
2523 {
2524 register processor_t processor;
2525 register thread_t *threadp;
2526 register int *gcount;
2527 register int *lcount;
2528 register thread_t new_thread;
2529 register int state;
2530 register processor_set_t pset;
2531 ast_t *myast = ast_pending();
2532
2533 processor = current_processor();
2534
2535 threadp = &processor->next_thread;
2536 lcount = &processor->runq.count;
2537 gcount = &processor->processor_set->runq.count;
2538
2539
2540 (void)splsched(); /* Turn interruptions off */
2541
2542 pmsDown(); /* Step power down. Note: interruptions must be disabled for this call */
2543
2544 while ( (*threadp == THREAD_NULL) &&
2545 (*gcount == 0) && (*lcount == 0) ) {
2546
2547 /* check for ASTs while we wait */
2548 if (*myast &~ (AST_SCHEDULING | AST_BSD)) {
2549 /* no ASTs for us */
2550 *myast &= AST_NONE;
2551 (void)spllo();
2552 }
2553 else
2554 machine_idle();
2555
2556 (void)splsched();
2557 }
2558
2559 /*
2560 * This is not a switch statement to avoid the
2561 * bounds checking code in the common case.
2562 */
2563 pset = processor->processor_set;
2564 simple_lock(&pset->sched_lock);
2565
2566 pmsStep(0); /* Step up out of idle power, may start timer for next step */
2567
2568 state = processor->state;
2569 if (state == PROCESSOR_DISPATCHING) {
2570 /*
2571 * Commmon case -- cpu dispatched.
2572 */
2573 new_thread = *threadp;
2574 *threadp = (volatile thread_t) THREAD_NULL;
2575 processor->state = PROCESSOR_RUNNING;
2576 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
2577
2578 if ( pset->runq.highq >= BASEPRI_RTQUEUES &&
2579 new_thread->sched_pri >= BASEPRI_RTQUEUES ) {
2580 register run_queue_t runq = &pset->runq;
2581 register queue_t q;
2582
2583 q = runq->queues + runq->highq;
2584 if (((thread_t)q->next)->realtime.deadline <
2585 processor->deadline) {
2586 thread_t thread = new_thread;
2587
2588 new_thread = (thread_t)q->next;
2589 ((queue_entry_t)new_thread)->next->prev = q;
2590 q->next = ((queue_entry_t)new_thread)->next;
2591 new_thread->runq = RUN_QUEUE_NULL;
2592 processor->deadline = new_thread->realtime.deadline;
2593 assert(new_thread->sched_mode & TH_MODE_PREEMPT);
2594 runq->count--; runq->urgency--;
2595 if (queue_empty(q)) {
2596 if (runq->highq != IDLEPRI)
2597 clrbit(MAXPRI - runq->highq, runq->bitmap);
2598 runq->highq = MAXPRI - ffsbit(runq->bitmap);
2599 }
2600 dispatch_counts.missed_realtime++;
2601 simple_unlock(&pset->sched_lock);
2602
2603 thread_lock(thread);
2604 thread_setrun(thread, SCHED_HEADQ);
2605 thread_unlock(thread);
2606
2607 counter(c_idle_thread_handoff++);
2608 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
2609 /*NOTREACHED*/
2610 }
2611 simple_unlock(&pset->sched_lock);
2612
2613 counter(c_idle_thread_handoff++);
2614 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
2615 /*NOTREACHED*/
2616 }
2617
2618 if ( processor->runq.highq > new_thread->sched_pri ||
2619 pset->runq.highq > new_thread->sched_pri ) {
2620 thread_t thread = new_thread;
2621
2622 new_thread = choose_thread(pset, processor);
2623 dispatch_counts.missed_other++;
2624 simple_unlock(&pset->sched_lock);
2625
2626 thread_lock(thread);
2627 thread_setrun(thread, SCHED_HEADQ);
2628 thread_unlock(thread);
2629
2630 counter(c_idle_thread_handoff++);
2631 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
2632 /* NOTREACHED */
2633 }
2634 else {
2635 simple_unlock(&pset->sched_lock);
2636
2637 counter(c_idle_thread_handoff++);
2638 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
2639 /* NOTREACHED */
2640 }
2641 }
2642 else
2643 if (state == PROCESSOR_IDLE) {
2644 /*
2645 * Processor was not dispatched (Rare).
2646 * Set it running again and force a
2647 * reschedule.
2648 */
2649 no_dispatch_count++;
2650 pset->idle_count--;
2651 remqueue(&pset->idle_queue, (queue_entry_t)processor);
2652 processor->state = PROCESSOR_RUNNING;
2653 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
2654 simple_unlock(&pset->sched_lock);
2655
2656 counter(c_idle_thread_block++);
2657 thread_block((thread_continue_t)idle_thread);
2658 /* NOTREACHED */
2659 }
2660 else
2661 if (state == PROCESSOR_SHUTDOWN) {
2662 /*
2663 * Going off-line. Force a
2664 * reschedule.
2665 */
2666 if ((new_thread = (thread_t)*threadp) != THREAD_NULL) {
2667 *threadp = (volatile thread_t) THREAD_NULL;
2668 processor->deadline = UINT64_MAX;
2669 simple_unlock(&pset->sched_lock);
2670
2671 thread_lock(new_thread);
2672 thread_setrun(new_thread, SCHED_HEADQ);
2673 thread_unlock(new_thread);
2674 }
2675 else
2676 simple_unlock(&pset->sched_lock);
2677
2678 counter(c_idle_thread_block++);
2679 thread_block((thread_continue_t)idle_thread);
2680 /* NOTREACHED */
2681 }
2682
2683 simple_unlock(&pset->sched_lock);
2684
2685 panic("idle_thread: state %d\n", processor->state);
2686 /*NOTREACHED*/
2687 }
2688
2689 kern_return_t
2690 idle_thread_create(
2691 processor_t processor)
2692 {
2693 kern_return_t result;
2694 thread_t thread;
2695 spl_t s;
2696
2697 result = kernel_thread_create((thread_continue_t)idle_thread, NULL, MAXPRI_KERNEL, &thread);
2698 if (result != KERN_SUCCESS)
2699 return (result);
2700
2701 s = splsched();
2702 thread_lock(thread);
2703 thread->bound_processor = processor;
2704 processor->idle_thread = thread;
2705 thread->sched_pri = thread->priority = IDLEPRI;
2706 thread->state = (TH_RUN | TH_IDLE);
2707 thread_unlock(thread);
2708 splx(s);
2709
2710 thread_deallocate(thread);
2711
2712 return (KERN_SUCCESS);
2713 }
2714
2715 static uint64_t sched_tick_deadline;
2716
2717 /*
2718 * sched_startup:
2719 *
2720 * Kicks off scheduler services.
2721 *
2722 * Called at splsched.
2723 */
2724 void
2725 sched_startup(void)
2726 {
2727 kern_return_t result;
2728 thread_t thread;
2729
2730 result = kernel_thread_start_priority((thread_continue_t)sched_tick_thread, NULL, MAXPRI_KERNEL, &thread);
2731 if (result != KERN_SUCCESS)
2732 panic("sched_startup");
2733
2734 thread_deallocate(thread);
2735
2736 /*
2737 * Yield to the sched_tick_thread while it times
2738 * a series of context switches back. It stores
2739 * the baseline value in sched_cswtime.
2740 *
2741 * The current thread is the only other thread
2742 * active at this point.
2743 */
2744 while (sched_cswtime == 0)
2745 thread_block(THREAD_CONTINUE_NULL);
2746
2747 thread_daemon_init();
2748
2749 thread_call_initialize();
2750 }
2751
2752 /*
2753 * sched_tick_thread:
2754 *
2755 * Perform periodic bookkeeping functions about ten
2756 * times per second.
2757 */
2758 static void
2759 sched_tick_continue(void)
2760 {
2761 uint64_t abstime = mach_absolute_time();
2762
2763 sched_tick++;
2764
2765 /*
2766 * Compute various averages.
2767 */
2768 compute_averages();
2769
2770 /*
2771 * Scan the run queues for threads which
2772 * may need to be updated.
2773 */
2774 thread_update_scan();
2775
2776 clock_deadline_for_periodic_event(sched_tick_interval, abstime,
2777 &sched_tick_deadline);
2778
2779 assert_wait_deadline((event_t)sched_tick_thread, THREAD_UNINT, sched_tick_deadline);
2780 thread_block((thread_continue_t)sched_tick_continue);
2781 /*NOTREACHED*/
2782 }
2783
2784 /*
2785 * Time a series of context switches to determine
2786 * a baseline. Toss the high and low and return
2787 * the one-way value.
2788 */
2789 static uint32_t
2790 time_cswitch(void)
2791 {
2792 uint32_t new, hi, low, accum;
2793 uint64_t abstime;
2794 int i, tries = 7;
2795
2796 accum = hi = low = 0;
2797 for (i = 0; i < tries; ++i) {
2798 abstime = mach_absolute_time();
2799 thread_block(THREAD_CONTINUE_NULL);
2800
2801 new = mach_absolute_time() - abstime;
2802
2803 if (i == 0)
2804 accum = hi = low = new;
2805 else {
2806 if (new < low)
2807 low = new;
2808 else
2809 if (new > hi)
2810 hi = new;
2811 accum += new;
2812 }
2813 }
2814
2815 return ((accum - hi - low) / (2 * (tries - 2)));
2816 }
2817
2818 void
2819 sched_tick_thread(void)
2820 {
2821 sched_cswtime = time_cswitch();
2822
2823 sched_tick_deadline = mach_absolute_time();
2824
2825 sched_tick_continue();
2826 /*NOTREACHED*/
2827 }
2828
2829 /*
2830 * thread_update_scan / runq_scan:
2831 *
2832 * Scan the run queues to account for timesharing threads
2833 * which need to be updated.
2834 *
2835 * Scanner runs in two passes. Pass one squirrels likely
2836 * threads away in an array, pass two does the update.
2837 *
2838 * This is necessary because the run queue is locked for
2839 * the candidate scan, but the thread is locked for the update.
2840 *
2841 * Array should be sized to make forward progress, without
2842 * disabling preemption for long periods.
2843 */
2844
2845 #define THREAD_UPDATE_SIZE 128
2846
2847 static thread_t thread_update_array[THREAD_UPDATE_SIZE];
2848 static int thread_update_count = 0;
2849
2850 /*
2851 * Scan a runq for candidate threads.
2852 *
2853 * Returns TRUE if retry is needed.
2854 */
2855 static boolean_t
2856 runq_scan(
2857 run_queue_t runq)
2858 {
2859 register int count;
2860 register queue_t q;
2861 register thread_t thread;
2862
2863 if ((count = runq->count) > 0) {
2864 q = runq->queues + runq->highq;
2865 while (count > 0) {
2866 queue_iterate(q, thread, thread_t, links) {
2867 if ( thread->sched_stamp != sched_tick &&
2868 (thread->sched_mode & TH_MODE_TIMESHARE) ) {
2869 if (thread_update_count == THREAD_UPDATE_SIZE)
2870 return (TRUE);
2871
2872 thread_update_array[thread_update_count++] = thread;
2873 thread_reference_internal(thread);
2874 }
2875
2876 count--;
2877 }
2878
2879 q--;
2880 }
2881 }
2882
2883 return (FALSE);
2884 }
2885
2886 static void
2887 thread_update_scan(void)
2888 {
2889 register boolean_t restart_needed;
2890 register processor_set_t pset = &default_pset;
2891 register processor_t processor;
2892 register thread_t thread;
2893 spl_t s;
2894
2895 do {
2896 s = splsched();
2897 simple_lock(&pset->sched_lock);
2898 restart_needed = runq_scan(&pset->runq);
2899 simple_unlock(&pset->sched_lock);
2900
2901 if (!restart_needed) {
2902 simple_lock(&pset->sched_lock);
2903 processor = (processor_t)queue_first(&pset->processors);
2904 while (!queue_end(&pset->processors, (queue_entry_t)processor)) {
2905 if ((restart_needed = runq_scan(&processor->runq)) != 0)
2906 break;
2907
2908 thread = processor->idle_thread;
2909 if (thread->sched_stamp != sched_tick) {
2910 if (thread_update_count == THREAD_UPDATE_SIZE) {
2911 restart_needed = TRUE;
2912 break;
2913 }
2914
2915 thread_update_array[thread_update_count++] = thread;
2916 thread_reference_internal(thread);
2917 }
2918
2919 processor = (processor_t)queue_next(&processor->processors);
2920 }
2921 simple_unlock(&pset->sched_lock);
2922 }
2923 splx(s);
2924
2925 /*
2926 * Ok, we now have a collection of candidates -- fix them.
2927 */
2928 while (thread_update_count > 0) {
2929 thread = thread_update_array[--thread_update_count];
2930 thread_update_array[thread_update_count] = THREAD_NULL;
2931
2932 s = splsched();
2933 thread_lock(thread);
2934 if ( !(thread->state & (TH_WAIT|TH_SUSP)) &&
2935 thread->sched_stamp != sched_tick )
2936 update_priority(thread);
2937 thread_unlock(thread);
2938 splx(s);
2939
2940 thread_deallocate(thread);
2941 }
2942 } while (restart_needed);
2943 }
2944
2945 /*
2946 * Just in case someone doesn't use the macro
2947 */
2948 #undef thread_wakeup
2949 void
2950 thread_wakeup(
2951 event_t x);
2952
2953 void
2954 thread_wakeup(
2955 event_t x)
2956 {
2957 thread_wakeup_with_result(x, THREAD_AWAKENED);
2958 }
2959
2960 boolean_t
2961 preemption_enabled(void)
2962 {
2963 return (get_preemption_level() == 0 && ml_get_interrupts_enabled());
2964 }
2965
2966 #if DEBUG
2967 static boolean_t
2968 thread_runnable(
2969 thread_t thread)
2970 {
2971 return ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN);
2972 }
2973 #endif /* DEBUG */
2974
2975 #if MACH_KDB
2976 #include <ddb/db_output.h>
2977 #define printf kdbprintf
2978 void db_sched(void);
2979
2980 void
2981 db_sched(void)
2982 {
2983 iprintf("Scheduling Statistics:\n");
2984 db_indent += 2;
2985 iprintf("Thread invocations: csw %d same %d\n",
2986 c_thread_invoke_csw, c_thread_invoke_same);
2987 #if MACH_COUNTERS
2988 iprintf("Thread block: calls %d\n",
2989 c_thread_block_calls);
2990 iprintf("Idle thread:\n\thandoff %d block %d no_dispatch %d\n",
2991 c_idle_thread_handoff,
2992 c_idle_thread_block, no_dispatch_count);
2993 iprintf("Sched thread blocks: %d\n", c_sched_thread_block);
2994 #endif /* MACH_COUNTERS */
2995 db_indent -= 2;
2996 }
2997
2998 #include <ddb/db_output.h>
2999 void db_show_thread_log(void);
3000
3001 void
3002 db_show_thread_log(void)
3003 {
3004 }
3005 #endif /* MACH_KDB */