]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/sched_prim.c
xnu-792.21.3.tar.gz
[apple/xnu.git] / osfmk / kern / sched_prim.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: sched_prim.c
60 * Author: Avadis Tevanian, Jr.
61 * Date: 1986
62 *
63 * Scheduling primitives
64 *
65 */
66
67 #include <debug.h>
68 #include <mach_kdb.h>
69
70 #include <ddb/db_output.h>
71
72 #include <mach/mach_types.h>
73 #include <mach/machine.h>
74 #include <mach/policy.h>
75 #include <mach/sync_policy.h>
76
77 #include <machine/machine_routines.h>
78 #include <machine/sched_param.h>
79
80 #include <kern/kern_types.h>
81 #include <kern/clock.h>
82 #include <kern/counters.h>
83 #include <kern/cpu_number.h>
84 #include <kern/cpu_data.h>
85 #include <kern/debug.h>
86 #include <kern/lock.h>
87 #include <kern/macro_help.h>
88 #include <kern/machine.h>
89 #include <kern/misc_protos.h>
90 #include <kern/processor.h>
91 #include <kern/queue.h>
92 #include <kern/sched.h>
93 #include <kern/sched_prim.h>
94 #include <kern/syscall_subr.h>
95 #include <kern/task.h>
96 #include <kern/thread.h>
97 #include <kern/wait_queue.h>
98
99 #include <vm/pmap.h>
100 #include <vm/vm_kern.h>
101 #include <vm/vm_map.h>
102
103 #include <sys/kdebug.h>
104
105 #ifdef __ppc__
106 #include <ppc/pms.h>
107 #endif
108
109 #define DEFAULT_PREEMPTION_RATE 100 /* (1/s) */
110 int default_preemption_rate = DEFAULT_PREEMPTION_RATE;
111
112 #define MAX_UNSAFE_QUANTA 800
113 int max_unsafe_quanta = MAX_UNSAFE_QUANTA;
114
115 #define MAX_POLL_QUANTA 2
116 int max_poll_quanta = MAX_POLL_QUANTA;
117
118 #define SCHED_POLL_YIELD_SHIFT 4 /* 1/16 */
119 int sched_poll_yield_shift = SCHED_POLL_YIELD_SHIFT;
120
121 uint64_t max_unsafe_computation;
122 uint32_t sched_safe_duration;
123 uint64_t max_poll_computation;
124
125 uint32_t std_quantum;
126 uint32_t min_std_quantum;
127
128 uint32_t std_quantum_us;
129
130 uint32_t max_rt_quantum;
131 uint32_t min_rt_quantum;
132
133 uint32_t sched_cswtime;
134
135 static uint32_t delay_idle_limit, delay_idle_spin;
136 static processor_t delay_idle(
137 processor_t processor,
138 thread_t self);
139
140 unsigned sched_tick;
141 uint32_t sched_tick_interval;
142
143 uint32_t sched_pri_shift;
144
145 /* Forwards */
146 void wait_queues_init(void);
147
148 static void load_shift_init(void);
149
150 static thread_t choose_thread(
151 processor_set_t pset,
152 processor_t processor);
153
154 static void thread_update_scan(void);
155
156 #if DEBUG
157 static
158 boolean_t thread_runnable(
159 thread_t thread);
160
161 #endif /*DEBUG*/
162
163
164 /*
165 * State machine
166 *
167 * states are combinations of:
168 * R running
169 * W waiting (or on wait queue)
170 * N non-interruptible
171 * O swapped out
172 * I being swapped in
173 *
174 * init action
175 * assert_wait thread_block clear_wait swapout swapin
176 *
177 * R RW, RWN R; setrun - -
178 * RN RWN RN; setrun - -
179 *
180 * RW W R -
181 * RWN WN RN -
182 *
183 * W R; setrun WO
184 * WN RN; setrun -
185 *
186 * RO - - R
187 *
188 */
189
190 /*
191 * Waiting protocols and implementation:
192 *
193 * Each thread may be waiting for exactly one event; this event
194 * is set using assert_wait(). That thread may be awakened either
195 * by performing a thread_wakeup_prim() on its event,
196 * or by directly waking that thread up with clear_wait().
197 *
198 * The implementation of wait events uses a hash table. Each
199 * bucket is queue of threads having the same hash function
200 * value; the chain for the queue (linked list) is the run queue
201 * field. [It is not possible to be waiting and runnable at the
202 * same time.]
203 *
204 * Locks on both the thread and on the hash buckets govern the
205 * wait event field and the queue chain field. Because wakeup
206 * operations only have the event as an argument, the event hash
207 * bucket must be locked before any thread.
208 *
209 * Scheduling operations may also occur at interrupt level; therefore,
210 * interrupts below splsched() must be prevented when holding
211 * thread or hash bucket locks.
212 *
213 * The wait event hash table declarations are as follows:
214 */
215
216 #define NUMQUEUES 59
217
218 struct wait_queue wait_queues[NUMQUEUES];
219
220 #define wait_hash(event) \
221 ((((int)(event) < 0)? ~(int)(event): (int)(event)) % NUMQUEUES)
222
223 int8_t sched_load_shifts[NRQS];
224
225 void
226 sched_init(void)
227 {
228 /*
229 * Calculate the timeslicing quantum
230 * in us.
231 */
232 if (default_preemption_rate < 1)
233 default_preemption_rate = DEFAULT_PREEMPTION_RATE;
234 std_quantum_us = (1000 * 1000) / default_preemption_rate;
235
236 printf("standard timeslicing quantum is %d us\n", std_quantum_us);
237
238 sched_safe_duration = (2 * max_unsafe_quanta / default_preemption_rate) *
239 (1 << SCHED_TICK_SHIFT);
240
241 wait_queues_init();
242 load_shift_init();
243 pset_init(&default_pset);
244 sched_tick = 0;
245 ast_init();
246 }
247
248 void
249 sched_timebase_init(void)
250 {
251 uint64_t abstime;
252 uint32_t shift;
253
254 /* standard timeslicing quantum */
255 clock_interval_to_absolutetime_interval(
256 std_quantum_us, NSEC_PER_USEC, &abstime);
257 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
258 std_quantum = abstime;
259
260 /* smallest remaining quantum (250 us) */
261 clock_interval_to_absolutetime_interval(250, NSEC_PER_USEC, &abstime);
262 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
263 min_std_quantum = abstime;
264
265 /* smallest rt computaton (50 us) */
266 clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime);
267 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
268 min_rt_quantum = abstime;
269
270 /* maximum rt computation (50 ms) */
271 clock_interval_to_absolutetime_interval(
272 50, 1000*NSEC_PER_USEC, &abstime);
273 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
274 max_rt_quantum = abstime;
275
276 /* scheduler tick interval */
277 clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT,
278 NSEC_PER_USEC, &abstime);
279 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
280 sched_tick_interval = abstime;
281
282 /*
283 * Compute conversion factor from usage to
284 * timesharing priorities with 5/8 ** n aging.
285 */
286 abstime = (abstime * 5) / 3;
287 for (shift = 0; abstime > BASEPRI_DEFAULT; ++shift)
288 abstime >>= 1;
289 sched_pri_shift = shift;
290
291 max_unsafe_computation = max_unsafe_quanta * std_quantum;
292 max_poll_computation = max_poll_quanta * std_quantum;
293
294 /* delay idle constant(s) (60, 1 us) */
295 clock_interval_to_absolutetime_interval(60, NSEC_PER_USEC, &abstime);
296 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
297 delay_idle_limit = abstime;
298
299 clock_interval_to_absolutetime_interval(1, NSEC_PER_USEC, &abstime);
300 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
301 delay_idle_spin = abstime;
302 }
303
304 void
305 wait_queues_init(void)
306 {
307 register int i;
308
309 for (i = 0; i < NUMQUEUES; i++) {
310 wait_queue_init(&wait_queues[i], SYNC_POLICY_FIFO);
311 }
312 }
313
314 /*
315 * Set up values for timeshare
316 * loading factors.
317 */
318 static void
319 load_shift_init(void)
320 {
321 int8_t k, *p = sched_load_shifts;
322 uint32_t i, j;
323
324 *p++ = INT8_MIN; *p++ = 0;
325
326 for (i = j = 2, k = 1; i < NRQS; ++k) {
327 for (j <<= 1; i < j; ++i)
328 *p++ = k;
329 }
330 }
331
332 /*
333 * Thread wait timer expiration.
334 */
335 void
336 thread_timer_expire(
337 void *p0,
338 __unused void *p1)
339 {
340 thread_t thread = p0;
341 spl_t s;
342
343 s = splsched();
344 thread_lock(thread);
345 if (--thread->wait_timer_active == 0) {
346 if (thread->wait_timer_is_set) {
347 thread->wait_timer_is_set = FALSE;
348 clear_wait_internal(thread, THREAD_TIMED_OUT);
349 }
350 }
351 thread_unlock(thread);
352 splx(s);
353 }
354
355 /*
356 * thread_set_timer:
357 *
358 * Set a timer for the current thread, if the thread
359 * is ready to wait. Must be called between assert_wait()
360 * and thread_block().
361 */
362 void
363 thread_set_timer(
364 uint32_t interval,
365 uint32_t scale_factor)
366 {
367 thread_t thread = current_thread();
368 uint64_t deadline;
369 spl_t s;
370
371 s = splsched();
372 thread_lock(thread);
373 if ((thread->state & TH_WAIT) != 0) {
374 clock_interval_to_deadline(interval, scale_factor, &deadline);
375 if (!timer_call_enter(&thread->wait_timer, deadline))
376 thread->wait_timer_active++;
377 thread->wait_timer_is_set = TRUE;
378 }
379 thread_unlock(thread);
380 splx(s);
381 }
382
383 void
384 thread_set_timer_deadline(
385 uint64_t deadline)
386 {
387 thread_t thread = current_thread();
388 spl_t s;
389
390 s = splsched();
391 thread_lock(thread);
392 if ((thread->state & TH_WAIT) != 0) {
393 if (!timer_call_enter(&thread->wait_timer, deadline))
394 thread->wait_timer_active++;
395 thread->wait_timer_is_set = TRUE;
396 }
397 thread_unlock(thread);
398 splx(s);
399 }
400
401 void
402 thread_cancel_timer(void)
403 {
404 thread_t thread = current_thread();
405 spl_t s;
406
407 s = splsched();
408 thread_lock(thread);
409 if (thread->wait_timer_is_set) {
410 if (timer_call_cancel(&thread->wait_timer))
411 thread->wait_timer_active--;
412 thread->wait_timer_is_set = FALSE;
413 }
414 thread_unlock(thread);
415 splx(s);
416 }
417
418 /*
419 * thread_unblock:
420 *
421 * Unblock thread on wake up.
422 *
423 * Returns TRUE if the thread is still running.
424 *
425 * Thread must be locked.
426 */
427 boolean_t
428 thread_unblock(
429 thread_t thread,
430 wait_result_t wresult)
431 {
432 boolean_t result = FALSE;
433
434 /*
435 * Set wait_result.
436 */
437 thread->wait_result = wresult;
438
439 /*
440 * Cancel pending wait timer.
441 */
442 if (thread->wait_timer_is_set) {
443 if (timer_call_cancel(&thread->wait_timer))
444 thread->wait_timer_active--;
445 thread->wait_timer_is_set = FALSE;
446 }
447
448 /*
449 * Update scheduling state.
450 */
451 thread->state &= ~(TH_WAIT|TH_UNINT);
452
453 if (!(thread->state & TH_RUN)) {
454 thread->state |= TH_RUN;
455
456 /*
457 * Mark unblocked if call out.
458 */
459 if (thread->options & TH_OPT_CALLOUT)
460 call_thread_unblock();
461
462 /*
463 * Update pset run counts.
464 */
465 pset_run_incr(thread->processor_set);
466 if (thread->sched_mode & TH_MODE_TIMESHARE)
467 pset_share_incr(thread->processor_set);
468 }
469 else
470 result = TRUE;
471
472 /*
473 * Calculate deadline for real-time threads.
474 */
475 if (thread->sched_mode & TH_MODE_REALTIME) {
476 thread->realtime.deadline = mach_absolute_time();
477 thread->realtime.deadline += thread->realtime.constraint;
478 }
479
480 /*
481 * Clear old quantum, fail-safe computation, etc.
482 */
483 thread->current_quantum = 0;
484 thread->computation_metered = 0;
485 thread->reason = AST_NONE;
486
487 KERNEL_DEBUG_CONSTANT(
488 MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE,
489 (int)thread, (int)thread->sched_pri, 0, 0, 0);
490
491 return (result);
492 }
493
494 /*
495 * Routine: thread_go
496 * Purpose:
497 * Unblock and dispatch thread.
498 * Conditions:
499 * thread lock held, IPC locks may be held.
500 * thread must have been pulled from wait queue under same lock hold.
501 * Returns:
502 * KERN_SUCCESS - Thread was set running
503 * KERN_NOT_WAITING - Thread was not waiting
504 */
505 kern_return_t
506 thread_go(
507 thread_t thread,
508 wait_result_t wresult)
509 {
510 assert(thread->at_safe_point == FALSE);
511 assert(thread->wait_event == NO_EVENT64);
512 assert(thread->wait_queue == WAIT_QUEUE_NULL);
513
514 if ((thread->state & (TH_WAIT|TH_TERMINATE)) == TH_WAIT) {
515 if (!thread_unblock(thread, wresult))
516 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
517
518 return (KERN_SUCCESS);
519 }
520
521 return (KERN_NOT_WAITING);
522 }
523
524 /*
525 * Routine: thread_mark_wait_locked
526 * Purpose:
527 * Mark a thread as waiting. If, given the circumstances,
528 * it doesn't want to wait (i.e. already aborted), then
529 * indicate that in the return value.
530 * Conditions:
531 * at splsched() and thread is locked.
532 */
533 __private_extern__
534 wait_result_t
535 thread_mark_wait_locked(
536 thread_t thread,
537 wait_interrupt_t interruptible)
538 {
539 boolean_t at_safe_point;
540
541 /*
542 * The thread may have certain types of interrupts/aborts masked
543 * off. Even if the wait location says these types of interrupts
544 * are OK, we have to honor mask settings (outer-scoped code may
545 * not be able to handle aborts at the moment).
546 */
547 if (interruptible > (thread->options & TH_OPT_INTMASK))
548 interruptible = thread->options & TH_OPT_INTMASK;
549
550 at_safe_point = (interruptible == THREAD_ABORTSAFE);
551
552 if ( interruptible == THREAD_UNINT ||
553 !(thread->state & TH_ABORT) ||
554 (!at_safe_point &&
555 (thread->state & TH_ABORT_SAFELY)) ) {
556 thread->state |= (interruptible) ? TH_WAIT : (TH_WAIT | TH_UNINT);
557 thread->at_safe_point = at_safe_point;
558 return (thread->wait_result = THREAD_WAITING);
559 }
560 else
561 if (thread->state & TH_ABORT_SAFELY)
562 thread->state &= ~(TH_ABORT|TH_ABORT_SAFELY);
563
564 return (thread->wait_result = THREAD_INTERRUPTED);
565 }
566
567 /*
568 * Routine: thread_interrupt_level
569 * Purpose:
570 * Set the maximum interruptible state for the
571 * current thread. The effective value of any
572 * interruptible flag passed into assert_wait
573 * will never exceed this.
574 *
575 * Useful for code that must not be interrupted,
576 * but which calls code that doesn't know that.
577 * Returns:
578 * The old interrupt level for the thread.
579 */
580 __private_extern__
581 wait_interrupt_t
582 thread_interrupt_level(
583 wait_interrupt_t new_level)
584 {
585 thread_t thread = current_thread();
586 wait_interrupt_t result = thread->options & TH_OPT_INTMASK;
587
588 thread->options = (thread->options & ~TH_OPT_INTMASK) | (new_level & TH_OPT_INTMASK);
589
590 return result;
591 }
592
593 /*
594 * Check to see if an assert wait is possible, without actually doing one.
595 * This is used by debug code in locks and elsewhere to verify that it is
596 * always OK to block when trying to take a blocking lock (since waiting
597 * for the actual assert_wait to catch the case may make it hard to detect
598 * this case.
599 */
600 boolean_t
601 assert_wait_possible(void)
602 {
603
604 thread_t thread;
605
606 #if DEBUG
607 if(debug_mode) return TRUE; /* Always succeed in debug mode */
608 #endif
609
610 thread = current_thread();
611
612 return (thread == NULL || wait_queue_assert_possible(thread));
613 }
614
615 /*
616 * assert_wait:
617 *
618 * Assert that the current thread is about to go to
619 * sleep until the specified event occurs.
620 */
621 wait_result_t
622 assert_wait(
623 event_t event,
624 wait_interrupt_t interruptible)
625 {
626 register wait_queue_t wq;
627 register int index;
628
629 assert(event != NO_EVENT);
630
631 index = wait_hash(event);
632 wq = &wait_queues[index];
633 return wait_queue_assert_wait(wq, event, interruptible, 0);
634 }
635
636 wait_result_t
637 assert_wait_timeout(
638 event_t event,
639 wait_interrupt_t interruptible,
640 uint32_t interval,
641 uint32_t scale_factor)
642 {
643 thread_t thread = current_thread();
644 wait_result_t wresult;
645 wait_queue_t wqueue;
646 uint64_t deadline;
647 spl_t s;
648
649 assert(event != NO_EVENT);
650 wqueue = &wait_queues[wait_hash(event)];
651
652 s = splsched();
653 wait_queue_lock(wqueue);
654 thread_lock(thread);
655
656 clock_interval_to_deadline(interval, scale_factor, &deadline);
657 wresult = wait_queue_assert_wait64_locked(wqueue, (uint32_t)event,
658 interruptible, deadline, thread);
659
660 thread_unlock(thread);
661 wait_queue_unlock(wqueue);
662 splx(s);
663
664 return (wresult);
665 }
666
667 wait_result_t
668 assert_wait_deadline(
669 event_t event,
670 wait_interrupt_t interruptible,
671 uint64_t deadline)
672 {
673 thread_t thread = current_thread();
674 wait_result_t wresult;
675 wait_queue_t wqueue;
676 spl_t s;
677
678 assert(event != NO_EVENT);
679 wqueue = &wait_queues[wait_hash(event)];
680
681 s = splsched();
682 wait_queue_lock(wqueue);
683 thread_lock(thread);
684
685 wresult = wait_queue_assert_wait64_locked(wqueue, (uint32_t)event,
686 interruptible, deadline, thread);
687
688 thread_unlock(thread);
689 wait_queue_unlock(wqueue);
690 splx(s);
691
692 return (wresult);
693 }
694
695 /*
696 * thread_sleep_fast_usimple_lock:
697 *
698 * Cause the current thread to wait until the specified event
699 * occurs. The specified simple_lock is unlocked before releasing
700 * the cpu and re-acquired as part of waking up.
701 *
702 * This is the simple lock sleep interface for components that use a
703 * faster version of simple_lock() than is provided by usimple_lock().
704 */
705 __private_extern__ wait_result_t
706 thread_sleep_fast_usimple_lock(
707 event_t event,
708 simple_lock_t lock,
709 wait_interrupt_t interruptible)
710 {
711 wait_result_t res;
712
713 res = assert_wait(event, interruptible);
714 if (res == THREAD_WAITING) {
715 simple_unlock(lock);
716 res = thread_block(THREAD_CONTINUE_NULL);
717 simple_lock(lock);
718 }
719 return res;
720 }
721
722
723 /*
724 * thread_sleep_usimple_lock:
725 *
726 * Cause the current thread to wait until the specified event
727 * occurs. The specified usimple_lock is unlocked before releasing
728 * the cpu and re-acquired as part of waking up.
729 *
730 * This is the simple lock sleep interface for components where
731 * simple_lock() is defined in terms of usimple_lock().
732 */
733 wait_result_t
734 thread_sleep_usimple_lock(
735 event_t event,
736 usimple_lock_t lock,
737 wait_interrupt_t interruptible)
738 {
739 wait_result_t res;
740
741 res = assert_wait(event, interruptible);
742 if (res == THREAD_WAITING) {
743 usimple_unlock(lock);
744 res = thread_block(THREAD_CONTINUE_NULL);
745 usimple_lock(lock);
746 }
747 return res;
748 }
749
750 /*
751 * thread_sleep_mutex:
752 *
753 * Cause the current thread to wait until the specified event
754 * occurs. The specified mutex is unlocked before releasing
755 * the cpu. The mutex will be re-acquired before returning.
756 *
757 * JMM - Add hint to make sure mutex is available before rousting
758 */
759 wait_result_t
760 thread_sleep_mutex(
761 event_t event,
762 mutex_t *mutex,
763 wait_interrupt_t interruptible)
764 {
765 wait_result_t res;
766
767 res = assert_wait(event, interruptible);
768 if (res == THREAD_WAITING) {
769 mutex_unlock(mutex);
770 res = thread_block(THREAD_CONTINUE_NULL);
771 mutex_lock(mutex);
772 }
773 return res;
774 }
775
776 /*
777 * thread_sleep_mutex_deadline:
778 *
779 * Cause the current thread to wait until the specified event
780 * (or deadline) occurs. The specified mutex is unlocked before
781 * releasing the cpu. The mutex will be re-acquired before returning.
782 */
783 wait_result_t
784 thread_sleep_mutex_deadline(
785 event_t event,
786 mutex_t *mutex,
787 uint64_t deadline,
788 wait_interrupt_t interruptible)
789 {
790 wait_result_t res;
791
792 res = assert_wait_deadline(event, interruptible, deadline);
793 if (res == THREAD_WAITING) {
794 mutex_unlock(mutex);
795 res = thread_block(THREAD_CONTINUE_NULL);
796 mutex_lock(mutex);
797 }
798 return res;
799 }
800
801 /*
802 * thread_sleep_lock_write:
803 *
804 * Cause the current thread to wait until the specified event
805 * occurs. The specified (write) lock is unlocked before releasing
806 * the cpu. The (write) lock will be re-acquired before returning.
807 */
808 wait_result_t
809 thread_sleep_lock_write(
810 event_t event,
811 lock_t *lock,
812 wait_interrupt_t interruptible)
813 {
814 wait_result_t res;
815
816 res = assert_wait(event, interruptible);
817 if (res == THREAD_WAITING) {
818 lock_write_done(lock);
819 res = thread_block(THREAD_CONTINUE_NULL);
820 lock_write(lock);
821 }
822 return res;
823 }
824
825 /*
826 * thread_stop:
827 *
828 * Force a preemption point for a thread and wait
829 * for it to stop running. Arbitrates access among
830 * multiple stop requests. (released by unstop)
831 *
832 * The thread must enter a wait state and stop via a
833 * separate means.
834 *
835 * Returns FALSE if interrupted.
836 */
837 boolean_t
838 thread_stop(
839 thread_t thread)
840 {
841 wait_result_t wresult;
842 spl_t s;
843
844 s = splsched();
845 wake_lock(thread);
846
847 while (thread->state & TH_SUSP) {
848 thread->wake_active = TRUE;
849 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
850 wake_unlock(thread);
851 splx(s);
852
853 if (wresult == THREAD_WAITING)
854 wresult = thread_block(THREAD_CONTINUE_NULL);
855
856 if (wresult != THREAD_AWAKENED)
857 return (FALSE);
858
859 s = splsched();
860 wake_lock(thread);
861 }
862
863 thread_lock(thread);
864 thread->state |= TH_SUSP;
865
866 while (thread->state & TH_RUN) {
867 processor_t processor = thread->last_processor;
868
869 if ( processor != PROCESSOR_NULL &&
870 processor->state == PROCESSOR_RUNNING &&
871 processor->active_thread == thread )
872 cause_ast_check(processor);
873 thread_unlock(thread);
874
875 thread->wake_active = TRUE;
876 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
877 wake_unlock(thread);
878 splx(s);
879
880 if (wresult == THREAD_WAITING)
881 wresult = thread_block(THREAD_CONTINUE_NULL);
882
883 if (wresult != THREAD_AWAKENED) {
884 thread_unstop(thread);
885 return (FALSE);
886 }
887
888 s = splsched();
889 wake_lock(thread);
890 thread_lock(thread);
891 }
892
893 thread_unlock(thread);
894 wake_unlock(thread);
895 splx(s);
896
897 return (TRUE);
898 }
899
900 /*
901 * thread_unstop:
902 *
903 * Release a previous stop request and set
904 * the thread running if appropriate.
905 *
906 * Use only after a successful stop operation.
907 */
908 void
909 thread_unstop(
910 thread_t thread)
911 {
912 spl_t s = splsched();
913
914 wake_lock(thread);
915 thread_lock(thread);
916
917 if ((thread->state & (TH_RUN|TH_WAIT|TH_SUSP)) == TH_SUSP) {
918 thread->state &= ~TH_SUSP;
919 thread_unblock(thread, THREAD_AWAKENED);
920
921 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
922 }
923 else
924 if (thread->state & TH_SUSP) {
925 thread->state &= ~TH_SUSP;
926
927 if (thread->wake_active) {
928 thread->wake_active = FALSE;
929 thread_unlock(thread);
930 wake_unlock(thread);
931 splx(s);
932
933 thread_wakeup(&thread->wake_active);
934 return;
935 }
936 }
937
938 thread_unlock(thread);
939 wake_unlock(thread);
940 splx(s);
941 }
942
943 /*
944 * thread_wait:
945 *
946 * Wait for a thread to stop running. (non-interruptible)
947 *
948 */
949 void
950 thread_wait(
951 thread_t thread)
952 {
953 wait_result_t wresult;
954 spl_t s = splsched();
955
956 wake_lock(thread);
957 thread_lock(thread);
958
959 while (thread->state & TH_RUN) {
960 processor_t processor = thread->last_processor;
961
962 if ( processor != PROCESSOR_NULL &&
963 processor->state == PROCESSOR_RUNNING &&
964 processor->active_thread == thread )
965 cause_ast_check(processor);
966 thread_unlock(thread);
967
968 thread->wake_active = TRUE;
969 wresult = assert_wait(&thread->wake_active, THREAD_UNINT);
970 wake_unlock(thread);
971 splx(s);
972
973 if (wresult == THREAD_WAITING)
974 thread_block(THREAD_CONTINUE_NULL);
975
976 s = splsched();
977 wake_lock(thread);
978 thread_lock(thread);
979 }
980
981 thread_unlock(thread);
982 wake_unlock(thread);
983 splx(s);
984 }
985
986 /*
987 * Routine: clear_wait_internal
988 *
989 * Clear the wait condition for the specified thread.
990 * Start the thread executing if that is appropriate.
991 * Arguments:
992 * thread thread to awaken
993 * result Wakeup result the thread should see
994 * Conditions:
995 * At splsched
996 * the thread is locked.
997 * Returns:
998 * KERN_SUCCESS thread was rousted out a wait
999 * KERN_FAILURE thread was waiting but could not be rousted
1000 * KERN_NOT_WAITING thread was not waiting
1001 */
1002 __private_extern__ kern_return_t
1003 clear_wait_internal(
1004 thread_t thread,
1005 wait_result_t wresult)
1006 {
1007 wait_queue_t wq = thread->wait_queue;
1008 int i = LockTimeOut;
1009
1010 do {
1011 if (wresult == THREAD_INTERRUPTED && (thread->state & TH_UNINT))
1012 return (KERN_FAILURE);
1013
1014 if (wq != WAIT_QUEUE_NULL) {
1015 if (wait_queue_lock_try(wq)) {
1016 wait_queue_pull_thread_locked(wq, thread, TRUE);
1017 /* wait queue unlocked, thread still locked */
1018 }
1019 else {
1020 thread_unlock(thread);
1021 delay(1);
1022
1023 thread_lock(thread);
1024 if (wq != thread->wait_queue)
1025 return (KERN_NOT_WAITING);
1026
1027 continue;
1028 }
1029 }
1030
1031 return (thread_go(thread, wresult));
1032 } while (--i > 0);
1033
1034 panic("clear_wait_internal: deadlock: thread=0x%x, wq=0x%x, cpu=%d\n",
1035 thread, wq, cpu_number());
1036
1037 return (KERN_FAILURE);
1038 }
1039
1040
1041 /*
1042 * clear_wait:
1043 *
1044 * Clear the wait condition for the specified thread. Start the thread
1045 * executing if that is appropriate.
1046 *
1047 * parameters:
1048 * thread thread to awaken
1049 * result Wakeup result the thread should see
1050 */
1051 kern_return_t
1052 clear_wait(
1053 thread_t thread,
1054 wait_result_t result)
1055 {
1056 kern_return_t ret;
1057 spl_t s;
1058
1059 s = splsched();
1060 thread_lock(thread);
1061 ret = clear_wait_internal(thread, result);
1062 thread_unlock(thread);
1063 splx(s);
1064 return ret;
1065 }
1066
1067
1068 /*
1069 * thread_wakeup_prim:
1070 *
1071 * Common routine for thread_wakeup, thread_wakeup_with_result,
1072 * and thread_wakeup_one.
1073 *
1074 */
1075 kern_return_t
1076 thread_wakeup_prim(
1077 event_t event,
1078 boolean_t one_thread,
1079 wait_result_t result)
1080 {
1081 register wait_queue_t wq;
1082 register int index;
1083
1084 index = wait_hash(event);
1085 wq = &wait_queues[index];
1086 if (one_thread)
1087 return (wait_queue_wakeup_one(wq, event, result));
1088 else
1089 return (wait_queue_wakeup_all(wq, event, result));
1090 }
1091
1092 /*
1093 * thread_bind:
1094 *
1095 * Force a thread to execute on the specified processor.
1096 *
1097 * Returns the previous binding. PROCESSOR_NULL means
1098 * not bound.
1099 *
1100 * XXX - DO NOT export this to users - XXX
1101 */
1102 processor_t
1103 thread_bind(
1104 register thread_t thread,
1105 processor_t processor)
1106 {
1107 processor_t prev;
1108 run_queue_t runq = RUN_QUEUE_NULL;
1109 spl_t s;
1110
1111 s = splsched();
1112 thread_lock(thread);
1113 prev = thread->bound_processor;
1114 if (prev != PROCESSOR_NULL)
1115 runq = run_queue_remove(thread);
1116
1117 thread->bound_processor = processor;
1118
1119 if (runq != RUN_QUEUE_NULL)
1120 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1121 thread_unlock(thread);
1122 splx(s);
1123
1124 return (prev);
1125 }
1126
1127 struct {
1128 uint32_t idle_pset_last,
1129 idle_pset_any,
1130 idle_bound;
1131
1132 uint32_t pset_self,
1133 pset_last,
1134 pset_other,
1135 bound_self,
1136 bound_other;
1137
1138 uint32_t realtime_self,
1139 realtime_last,
1140 realtime_other;
1141
1142 uint32_t missed_realtime,
1143 missed_other;
1144 } dispatch_counts;
1145
1146 /*
1147 * Select a thread for the current processor to run.
1148 *
1149 * May select the current thread, which must be locked.
1150 */
1151 thread_t
1152 thread_select(
1153 register processor_t processor)
1154 {
1155 register thread_t thread;
1156 processor_set_t pset;
1157 boolean_t other_runnable;
1158
1159 /*
1160 * Check for other non-idle runnable threads.
1161 */
1162 pset = processor->processor_set;
1163 thread = processor->active_thread;
1164
1165 /* Update the thread's priority */
1166 if (thread->sched_stamp != sched_tick)
1167 update_priority(thread);
1168
1169 processor->current_pri = thread->sched_pri;
1170
1171 simple_lock(&pset->sched_lock);
1172
1173 other_runnable = processor->runq.count > 0 || pset->runq.count > 0;
1174
1175 if ( thread->state == TH_RUN &&
1176 thread->processor_set == pset &&
1177 (thread->bound_processor == PROCESSOR_NULL ||
1178 thread->bound_processor == processor) ) {
1179 if ( thread->sched_pri >= BASEPRI_RTQUEUES &&
1180 first_timeslice(processor) ) {
1181 if (pset->runq.highq >= BASEPRI_RTQUEUES) {
1182 register run_queue_t runq = &pset->runq;
1183 register queue_t q;
1184
1185 q = runq->queues + runq->highq;
1186 if (((thread_t)q->next)->realtime.deadline <
1187 processor->deadline) {
1188 thread = (thread_t)q->next;
1189 ((queue_entry_t)thread)->next->prev = q;
1190 q->next = ((queue_entry_t)thread)->next;
1191 thread->runq = RUN_QUEUE_NULL;
1192 assert(thread->sched_mode & TH_MODE_PREEMPT);
1193 runq->count--; runq->urgency--;
1194 if (queue_empty(q)) {
1195 if (runq->highq != IDLEPRI)
1196 clrbit(MAXPRI - runq->highq, runq->bitmap);
1197 runq->highq = MAXPRI - ffsbit(runq->bitmap);
1198 }
1199 }
1200 }
1201
1202 processor->deadline = thread->realtime.deadline;
1203
1204 simple_unlock(&pset->sched_lock);
1205
1206 return (thread);
1207 }
1208
1209 if ( (!other_runnable ||
1210 (processor->runq.highq < thread->sched_pri &&
1211 pset->runq.highq < thread->sched_pri)) ) {
1212
1213 /* I am the highest priority runnable (non-idle) thread */
1214
1215 processor->deadline = UINT64_MAX;
1216
1217 simple_unlock(&pset->sched_lock);
1218
1219 return (thread);
1220 }
1221 }
1222
1223 if (other_runnable)
1224 thread = choose_thread(pset, processor);
1225 else {
1226 /*
1227 * Nothing is runnable, so set this processor idle if it
1228 * was running. Return its idle thread.
1229 */
1230 if (processor->state == PROCESSOR_RUNNING) {
1231 remqueue(&pset->active_queue, (queue_entry_t)processor);
1232 processor->state = PROCESSOR_IDLE;
1233
1234 enqueue_tail(&pset->idle_queue, (queue_entry_t)processor);
1235 pset->idle_count++;
1236 }
1237
1238 processor->deadline = UINT64_MAX;
1239
1240 thread = processor->idle_thread;
1241 }
1242
1243 simple_unlock(&pset->sched_lock);
1244
1245 return (thread);
1246 }
1247
1248 /*
1249 * Perform a context switch and start executing the new thread.
1250 *
1251 * Returns FALSE on failure, and the thread is re-dispatched.
1252 *
1253 * Called at splsched.
1254 */
1255
1256 #define funnel_release_check(thread, debug) \
1257 MACRO_BEGIN \
1258 if ((thread)->funnel_state & TH_FN_OWNED) { \
1259 (thread)->funnel_state = TH_FN_REFUNNEL; \
1260 KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE, \
1261 (thread)->funnel_lock, (debug), 0, 0, 0); \
1262 funnel_unlock((thread)->funnel_lock); \
1263 } \
1264 MACRO_END
1265
1266 #define funnel_refunnel_check(thread, debug) \
1267 MACRO_BEGIN \
1268 if ((thread)->funnel_state & TH_FN_REFUNNEL) { \
1269 kern_return_t result = (thread)->wait_result; \
1270 \
1271 (thread)->funnel_state = 0; \
1272 KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE, \
1273 (thread)->funnel_lock, (debug), 0, 0, 0); \
1274 funnel_lock((thread)->funnel_lock); \
1275 KERNEL_DEBUG(0x6032430 | DBG_FUNC_NONE, \
1276 (thread)->funnel_lock, (debug), 0, 0, 0); \
1277 (thread)->funnel_state = TH_FN_OWNED; \
1278 (thread)->wait_result = result; \
1279 } \
1280 MACRO_END
1281
1282 boolean_t
1283 thread_invoke(
1284 register thread_t old_thread,
1285 register thread_t new_thread,
1286 ast_t reason)
1287 {
1288 thread_continue_t new_cont, continuation = old_thread->continuation;
1289 void *new_param, *parameter = old_thread->parameter;
1290 processor_t processor;
1291 thread_t prev_thread;
1292
1293 if (get_preemption_level() != 0)
1294 panic("thread_invoke: preemption_level %d\n",
1295 get_preemption_level());
1296
1297 assert(old_thread == current_thread());
1298
1299 /*
1300 * Mark thread interruptible.
1301 */
1302 thread_lock(new_thread);
1303 new_thread->state &= ~TH_UNINT;
1304
1305 assert(thread_runnable(new_thread));
1306
1307 /*
1308 * Allow time constraint threads to hang onto
1309 * a stack.
1310 */
1311 if ( (old_thread->sched_mode & TH_MODE_REALTIME) &&
1312 !old_thread->reserved_stack ) {
1313 old_thread->reserved_stack = old_thread->kernel_stack;
1314 }
1315
1316 if (continuation != NULL) {
1317 if (!new_thread->kernel_stack) {
1318 /*
1319 * If the old thread is using a privileged stack,
1320 * check to see whether we can exchange it with
1321 * that of the new thread.
1322 */
1323 if ( old_thread->kernel_stack == old_thread->reserved_stack &&
1324 !new_thread->reserved_stack)
1325 goto need_stack;
1326
1327 /*
1328 * Context switch by performing a stack handoff.
1329 */
1330 new_cont = new_thread->continuation;
1331 new_thread->continuation = NULL;
1332 new_param = new_thread->parameter;
1333 new_thread->parameter = NULL;
1334
1335 processor = current_processor();
1336 processor->active_thread = new_thread;
1337 processor->current_pri = new_thread->sched_pri;
1338 new_thread->last_processor = processor;
1339 ast_context(new_thread);
1340 thread_unlock(new_thread);
1341
1342 current_task()->csw++;
1343
1344 old_thread->reason = reason;
1345
1346 processor->last_dispatch = mach_absolute_time();
1347 timer_event((uint32_t)processor->last_dispatch,
1348 &new_thread->system_timer);
1349
1350 thread_done(old_thread, new_thread, processor);
1351
1352 machine_stack_handoff(old_thread, new_thread);
1353
1354 thread_begin(new_thread, processor);
1355
1356 /*
1357 * Now dispatch the old thread.
1358 */
1359 thread_dispatch(old_thread);
1360
1361 counter_always(c_thread_invoke_hits++);
1362
1363 funnel_refunnel_check(new_thread, 2);
1364 (void) spllo();
1365
1366 assert(new_cont);
1367 call_continuation(new_cont, new_param, new_thread->wait_result);
1368 /*NOTREACHED*/
1369 }
1370 else
1371 if (new_thread == old_thread) {
1372 /* same thread but with continuation */
1373 counter(++c_thread_invoke_same);
1374 thread_unlock(new_thread);
1375
1376 funnel_refunnel_check(new_thread, 3);
1377 (void) spllo();
1378
1379 call_continuation(continuation, parameter, new_thread->wait_result);
1380 /*NOTREACHED*/
1381 }
1382 }
1383 else {
1384 /*
1385 * Check that the new thread has a stack
1386 */
1387 if (!new_thread->kernel_stack) {
1388 need_stack:
1389 if (!stack_alloc_try(new_thread)) {
1390 counter_always(c_thread_invoke_misses++);
1391 thread_unlock(new_thread);
1392 thread_stack_enqueue(new_thread);
1393 return (FALSE);
1394 }
1395 }
1396 else
1397 if (new_thread == old_thread) {
1398 counter(++c_thread_invoke_same);
1399 thread_unlock(new_thread);
1400 return (TRUE);
1401 }
1402 }
1403
1404 /*
1405 * Context switch by full context save.
1406 */
1407 processor = current_processor();
1408 processor->active_thread = new_thread;
1409 processor->current_pri = new_thread->sched_pri;
1410 new_thread->last_processor = processor;
1411 ast_context(new_thread);
1412 assert(thread_runnable(new_thread));
1413 thread_unlock(new_thread);
1414
1415 counter_always(c_thread_invoke_csw++);
1416 current_task()->csw++;
1417
1418 assert(old_thread->runq == RUN_QUEUE_NULL);
1419 old_thread->reason = reason;
1420
1421 processor->last_dispatch = mach_absolute_time();
1422 timer_event((uint32_t)processor->last_dispatch, &new_thread->system_timer);
1423
1424 thread_done(old_thread, new_thread, processor);
1425
1426 /*
1427 * This is where we actually switch register context,
1428 * and address space if required. Control will not
1429 * return here immediately.
1430 */
1431 prev_thread = machine_switch_context(old_thread, continuation, new_thread);
1432
1433 /*
1434 * We are still old_thread, possibly on a different processor,
1435 * and new_thread is now stale.
1436 */
1437 thread_begin(old_thread, old_thread->last_processor);
1438
1439 /*
1440 * Now dispatch the thread which resumed us.
1441 */
1442 thread_dispatch(prev_thread);
1443
1444 if (continuation) {
1445 funnel_refunnel_check(old_thread, 3);
1446 (void) spllo();
1447
1448 call_continuation(continuation, parameter, old_thread->wait_result);
1449 /*NOTREACHED*/
1450 }
1451
1452 return (TRUE);
1453 }
1454
1455 /*
1456 * thread_done:
1457 *
1458 * Perform calculations for thread
1459 * finishing execution on the current processor.
1460 *
1461 * Called at splsched.
1462 */
1463 void
1464 thread_done(
1465 thread_t old_thread,
1466 thread_t new_thread,
1467 processor_t processor)
1468 {
1469 if (!(old_thread->state & TH_IDLE)) {
1470 /*
1471 * Compute remainder of current quantum.
1472 */
1473 if ( first_timeslice(processor) &&
1474 processor->quantum_end > processor->last_dispatch )
1475 old_thread->current_quantum =
1476 (processor->quantum_end - processor->last_dispatch);
1477 else
1478 old_thread->current_quantum = 0;
1479
1480 if (old_thread->sched_mode & TH_MODE_REALTIME) {
1481 /*
1482 * Cancel the deadline if the thread has
1483 * consumed the entire quantum.
1484 */
1485 if (old_thread->current_quantum == 0) {
1486 old_thread->realtime.deadline = UINT64_MAX;
1487 old_thread->reason |= AST_QUANTUM;
1488 }
1489 }
1490 else {
1491 /*
1492 * For non-realtime threads treat a tiny
1493 * remaining quantum as an expired quantum
1494 * but include what's left next time.
1495 */
1496 if (old_thread->current_quantum < min_std_quantum) {
1497 old_thread->reason |= AST_QUANTUM;
1498 old_thread->current_quantum += std_quantum;
1499 }
1500 }
1501
1502 /*
1503 * If we are doing a direct handoff then
1504 * give the remainder of our quantum to
1505 * the next thread.
1506 */
1507 if ((old_thread->reason & (AST_HANDOFF|AST_QUANTUM)) == AST_HANDOFF) {
1508 new_thread->current_quantum = old_thread->current_quantum;
1509 old_thread->reason |= AST_QUANTUM;
1510 old_thread->current_quantum = 0;
1511 }
1512
1513 old_thread->last_switch = processor->last_dispatch;
1514
1515 old_thread->computation_metered +=
1516 (old_thread->last_switch - old_thread->computation_epoch);
1517 }
1518 }
1519
1520 /*
1521 * thread_begin:
1522 *
1523 * Set up for thread beginning execution on
1524 * the current processor.
1525 *
1526 * Called at splsched.
1527 */
1528 void
1529 thread_begin(
1530 thread_t thread,
1531 processor_t processor)
1532 {
1533 if (!(thread->state & TH_IDLE)) {
1534 /*
1535 * Give the thread a new quantum
1536 * if none remaining.
1537 */
1538 if (thread->current_quantum == 0)
1539 thread_quantum_init(thread);
1540
1541 /*
1542 * Set up quantum timer and timeslice.
1543 */
1544 processor->quantum_end =
1545 (processor->last_dispatch + thread->current_quantum);
1546 timer_call_enter1(&processor->quantum_timer,
1547 thread, processor->quantum_end);
1548
1549 processor_timeslice_setup(processor, thread);
1550
1551 thread->last_switch = processor->last_dispatch;
1552
1553 thread->computation_epoch = thread->last_switch;
1554 }
1555 else {
1556 timer_call_cancel(&processor->quantum_timer);
1557 processor->timeslice = 1;
1558 }
1559 }
1560
1561 /*
1562 * thread_dispatch:
1563 *
1564 * Handle previous thread at context switch. Re-dispatch
1565 * if still running, otherwise update run state and perform
1566 * special actions.
1567 *
1568 * Called at splsched.
1569 */
1570 void
1571 thread_dispatch(
1572 register thread_t thread)
1573 {
1574 /*
1575 * If blocked at a continuation, discard
1576 * the stack.
1577 */
1578 #ifndef i386
1579 if (thread->continuation != NULL && thread->kernel_stack)
1580 stack_free(thread);
1581 #endif
1582
1583 if (!(thread->state & TH_IDLE)) {
1584 wake_lock(thread);
1585 thread_lock(thread);
1586
1587 if (!(thread->state & TH_WAIT)) {
1588 /*
1589 * Still running.
1590 */
1591 if (thread->reason & AST_QUANTUM)
1592 thread_setrun(thread, SCHED_TAILQ);
1593 else
1594 if (thread->reason & AST_PREEMPT)
1595 thread_setrun(thread, SCHED_HEADQ);
1596 else
1597 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1598
1599 thread->reason = AST_NONE;
1600
1601 thread_unlock(thread);
1602 wake_unlock(thread);
1603 }
1604 else {
1605 boolean_t wake;
1606
1607 /*
1608 * Waiting.
1609 */
1610 thread->state &= ~TH_RUN;
1611
1612 wake = thread->wake_active;
1613 thread->wake_active = FALSE;
1614
1615 if (thread->sched_mode & TH_MODE_TIMESHARE)
1616 pset_share_decr(thread->processor_set);
1617 pset_run_decr(thread->processor_set);
1618
1619 thread_unlock(thread);
1620 wake_unlock(thread);
1621
1622 if (thread->options & TH_OPT_CALLOUT)
1623 call_thread_block();
1624
1625 if (wake)
1626 thread_wakeup((event_t)&thread->wake_active);
1627
1628 if (thread->state & TH_TERMINATE)
1629 thread_terminate_enqueue(thread);
1630 }
1631 }
1632 }
1633
1634 /*
1635 * thread_block_reason:
1636 *
1637 * Forces a reschedule, blocking the caller if a wait
1638 * has been asserted.
1639 *
1640 * If a continuation is specified, then thread_invoke will
1641 * attempt to discard the thread's kernel stack. When the
1642 * thread resumes, it will execute the continuation function
1643 * on a new kernel stack.
1644 */
1645 counter(mach_counter_t c_thread_block_calls = 0;)
1646
1647 wait_result_t
1648 thread_block_reason(
1649 thread_continue_t continuation,
1650 void *parameter,
1651 ast_t reason)
1652 {
1653 register thread_t self = current_thread();
1654 register processor_t processor;
1655 register thread_t new_thread;
1656 spl_t s;
1657
1658 counter(++c_thread_block_calls);
1659
1660 s = splsched();
1661
1662 if (!(reason & AST_PREEMPT))
1663 funnel_release_check(self, 2);
1664
1665 processor = current_processor();
1666
1667 /*
1668 * Delay switching to the idle thread under certain conditions.
1669 */
1670 if (s != FALSE && (self->state & (TH_IDLE|TH_TERMINATE|TH_WAIT)) == TH_WAIT) {
1671 if ( processor->processor_set->processor_count > 1 &&
1672 processor->processor_set->runq.count == 0 &&
1673 processor->runq.count == 0 )
1674 processor = delay_idle(processor, self);
1675 }
1676
1677 /* If we're explicitly yielding, force a subsequent quantum */
1678 if (reason & AST_YIELD)
1679 processor->timeslice = 0;
1680
1681 /* We're handling all scheduling AST's */
1682 ast_off(AST_SCHEDULING);
1683
1684 self->continuation = continuation;
1685 self->parameter = parameter;
1686
1687 thread_lock(self);
1688 new_thread = thread_select(processor);
1689 assert(new_thread && thread_runnable(new_thread));
1690 thread_unlock(self);
1691 while (!thread_invoke(self, new_thread, reason)) {
1692 thread_lock(self);
1693 new_thread = thread_select(processor);
1694 assert(new_thread && thread_runnable(new_thread));
1695 thread_unlock(self);
1696 }
1697
1698 funnel_refunnel_check(self, 5);
1699 splx(s);
1700
1701 return (self->wait_result);
1702 }
1703
1704 /*
1705 * thread_block:
1706 *
1707 * Block the current thread if a wait has been asserted.
1708 */
1709 wait_result_t
1710 thread_block(
1711 thread_continue_t continuation)
1712 {
1713 return thread_block_reason(continuation, NULL, AST_NONE);
1714 }
1715
1716 wait_result_t
1717 thread_block_parameter(
1718 thread_continue_t continuation,
1719 void *parameter)
1720 {
1721 return thread_block_reason(continuation, parameter, AST_NONE);
1722 }
1723
1724 /*
1725 * thread_run:
1726 *
1727 * Switch directly from the current thread to the
1728 * new thread, handing off our quantum if appropriate.
1729 *
1730 * New thread must be runnable, and not on a run queue.
1731 *
1732 * Called at splsched.
1733 */
1734 int
1735 thread_run(
1736 thread_t self,
1737 thread_continue_t continuation,
1738 void *parameter,
1739 thread_t new_thread)
1740 {
1741 ast_t handoff = AST_HANDOFF;
1742
1743 funnel_release_check(self, 3);
1744
1745 self->continuation = continuation;
1746 self->parameter = parameter;
1747
1748 while (!thread_invoke(self, new_thread, handoff)) {
1749 register processor_t processor = current_processor();
1750
1751 thread_lock(self);
1752 new_thread = thread_select(processor);
1753 thread_unlock(self);
1754 handoff = AST_NONE;
1755 }
1756
1757 funnel_refunnel_check(self, 6);
1758
1759 return (self->wait_result);
1760 }
1761
1762 /*
1763 * thread_continue:
1764 *
1765 * Called at splsched when a thread first receives
1766 * a new stack after a continuation.
1767 */
1768 void
1769 thread_continue(
1770 register thread_t old_thread)
1771 {
1772 register thread_t self = current_thread();
1773 register thread_continue_t continuation;
1774 register void *parameter;
1775
1776 continuation = self->continuation;
1777 self->continuation = NULL;
1778 parameter = self->parameter;
1779 self->parameter = NULL;
1780
1781 thread_begin(self, self->last_processor);
1782
1783 if (old_thread != THREAD_NULL)
1784 thread_dispatch(old_thread);
1785
1786 funnel_refunnel_check(self, 4);
1787
1788 if (old_thread != THREAD_NULL)
1789 (void)spllo();
1790
1791 call_continuation(continuation, parameter, self->wait_result);
1792 /*NOTREACHED*/
1793 }
1794
1795 /*
1796 * Enqueue thread on run queue. Thread must be locked,
1797 * and not already be on a run queue. Returns TRUE
1798 * if a preemption is indicated based on the state
1799 * of the run queue.
1800 *
1801 * Run queue must be locked, see run_queue_remove()
1802 * for more info.
1803 */
1804 static boolean_t
1805 run_queue_enqueue(
1806 register run_queue_t rq,
1807 register thread_t thread,
1808 integer_t options)
1809 {
1810 register int whichq = thread->sched_pri;
1811 register queue_t queue = &rq->queues[whichq];
1812 boolean_t result = FALSE;
1813
1814 assert(whichq >= MINPRI && whichq <= MAXPRI);
1815
1816 assert(thread->runq == RUN_QUEUE_NULL);
1817 if (queue_empty(queue)) {
1818 enqueue_tail(queue, (queue_entry_t)thread);
1819
1820 setbit(MAXPRI - whichq, rq->bitmap);
1821 if (whichq > rq->highq) {
1822 rq->highq = whichq;
1823 result = TRUE;
1824 }
1825 }
1826 else
1827 if (options & SCHED_HEADQ)
1828 enqueue_head(queue, (queue_entry_t)thread);
1829 else
1830 enqueue_tail(queue, (queue_entry_t)thread);
1831
1832 thread->runq = rq;
1833 if (thread->sched_mode & TH_MODE_PREEMPT)
1834 rq->urgency++;
1835 rq->count++;
1836
1837 return (result);
1838 }
1839
1840 /*
1841 * Enqueue a thread for realtime execution, similar
1842 * to above. Handles preemption directly.
1843 */
1844 static void
1845 realtime_schedule_insert(
1846 register processor_set_t pset,
1847 register thread_t thread)
1848 {
1849 register run_queue_t rq = &pset->runq;
1850 register int whichq = thread->sched_pri;
1851 register queue_t queue = &rq->queues[whichq];
1852 uint64_t deadline = thread->realtime.deadline;
1853 boolean_t try_preempt = FALSE;
1854
1855 assert(whichq >= BASEPRI_REALTIME && whichq <= MAXPRI);
1856
1857 assert(thread->runq == RUN_QUEUE_NULL);
1858 if (queue_empty(queue)) {
1859 enqueue_tail(queue, (queue_entry_t)thread);
1860
1861 setbit(MAXPRI - whichq, rq->bitmap);
1862 if (whichq > rq->highq)
1863 rq->highq = whichq;
1864 try_preempt = TRUE;
1865 }
1866 else {
1867 register thread_t entry = (thread_t)queue_first(queue);
1868
1869 while (TRUE) {
1870 if ( queue_end(queue, (queue_entry_t)entry) ||
1871 deadline < entry->realtime.deadline ) {
1872 entry = (thread_t)queue_prev((queue_entry_t)entry);
1873 break;
1874 }
1875
1876 entry = (thread_t)queue_next((queue_entry_t)entry);
1877 }
1878
1879 if ((queue_entry_t)entry == queue)
1880 try_preempt = TRUE;
1881
1882 insque((queue_entry_t)thread, (queue_entry_t)entry);
1883 }
1884
1885 thread->runq = rq;
1886 assert(thread->sched_mode & TH_MODE_PREEMPT);
1887 rq->count++; rq->urgency++;
1888
1889 if (try_preempt) {
1890 register processor_t processor;
1891
1892 processor = current_processor();
1893 if ( pset == processor->processor_set &&
1894 (thread->sched_pri > processor->current_pri ||
1895 deadline < processor->deadline ) ) {
1896 dispatch_counts.realtime_self++;
1897 simple_unlock(&pset->sched_lock);
1898
1899 ast_on(AST_PREEMPT | AST_URGENT);
1900 return;
1901 }
1902
1903 if ( pset->processor_count > 1 ||
1904 pset != processor->processor_set ) {
1905 processor_t myprocessor, lastprocessor;
1906 queue_entry_t next;
1907
1908 myprocessor = processor;
1909 processor = thread->last_processor;
1910 if ( processor != myprocessor &&
1911 processor != PROCESSOR_NULL &&
1912 processor->processor_set == pset &&
1913 processor->state == PROCESSOR_RUNNING &&
1914 (thread->sched_pri > processor->current_pri ||
1915 deadline < processor->deadline ) ) {
1916 dispatch_counts.realtime_last++;
1917 cause_ast_check(processor);
1918 simple_unlock(&pset->sched_lock);
1919 return;
1920 }
1921
1922 lastprocessor = processor;
1923 queue = &pset->active_queue;
1924 processor = (processor_t)queue_first(queue);
1925 while (!queue_end(queue, (queue_entry_t)processor)) {
1926 next = queue_next((queue_entry_t)processor);
1927
1928 if ( processor != myprocessor &&
1929 processor != lastprocessor &&
1930 (thread->sched_pri > processor->current_pri ||
1931 deadline < processor->deadline ) ) {
1932 if (!queue_end(queue, next)) {
1933 remqueue(queue, (queue_entry_t)processor);
1934 enqueue_tail(queue, (queue_entry_t)processor);
1935 }
1936 dispatch_counts.realtime_other++;
1937 cause_ast_check(processor);
1938 simple_unlock(&pset->sched_lock);
1939 return;
1940 }
1941
1942 processor = (processor_t)next;
1943 }
1944 }
1945 }
1946
1947 simple_unlock(&pset->sched_lock);
1948 }
1949
1950 /*
1951 * thread_setrun:
1952 *
1953 * Dispatch thread for execution, directly onto an idle
1954 * processor if possible. Else put on appropriate run
1955 * queue. (local if bound, else processor set)
1956 *
1957 * Thread must be locked.
1958 */
1959 void
1960 thread_setrun(
1961 register thread_t new_thread,
1962 integer_t options)
1963 {
1964 register processor_t processor;
1965 register processor_set_t pset;
1966 register thread_t thread;
1967 ast_t preempt = (options & SCHED_PREEMPT)?
1968 AST_PREEMPT: AST_NONE;
1969
1970 assert(thread_runnable(new_thread));
1971
1972 /*
1973 * Update priority if needed.
1974 */
1975 if (new_thread->sched_stamp != sched_tick)
1976 update_priority(new_thread);
1977
1978 /*
1979 * Check for urgent preemption.
1980 */
1981 if (new_thread->sched_mode & TH_MODE_PREEMPT)
1982 preempt = (AST_PREEMPT | AST_URGENT);
1983
1984 assert(new_thread->runq == RUN_QUEUE_NULL);
1985
1986 if ((processor = new_thread->bound_processor) == PROCESSOR_NULL) {
1987 /*
1988 * First try to dispatch on
1989 * the last processor.
1990 */
1991 pset = new_thread->processor_set;
1992 processor = new_thread->last_processor;
1993 if ( pset->processor_count > 1 &&
1994 processor != PROCESSOR_NULL &&
1995 processor->state == PROCESSOR_IDLE ) {
1996 processor_lock(processor);
1997 simple_lock(&pset->sched_lock);
1998 if ( processor->processor_set == pset &&
1999 processor->state == PROCESSOR_IDLE ) {
2000 remqueue(&pset->idle_queue, (queue_entry_t)processor);
2001 pset->idle_count--;
2002 processor->next_thread = new_thread;
2003 if (new_thread->sched_pri >= BASEPRI_RTQUEUES)
2004 processor->deadline = new_thread->realtime.deadline;
2005 else
2006 processor->deadline = UINT64_MAX;
2007 processor->state = PROCESSOR_DISPATCHING;
2008 dispatch_counts.idle_pset_last++;
2009 simple_unlock(&pset->sched_lock);
2010 processor_unlock(processor);
2011 if (processor != current_processor())
2012 machine_signal_idle(processor);
2013 return;
2014 }
2015 processor_unlock(processor);
2016 }
2017 else
2018 simple_lock(&pset->sched_lock);
2019
2020 /*
2021 * Next pick any idle processor
2022 * in the processor set.
2023 */
2024 if (pset->idle_count > 0) {
2025 processor = (processor_t)dequeue_head(&pset->idle_queue);
2026 pset->idle_count--;
2027 processor->next_thread = new_thread;
2028 if (new_thread->sched_pri >= BASEPRI_RTQUEUES)
2029 processor->deadline = new_thread->realtime.deadline;
2030 else
2031 processor->deadline = UINT64_MAX;
2032 processor->state = PROCESSOR_DISPATCHING;
2033 dispatch_counts.idle_pset_any++;
2034 simple_unlock(&pset->sched_lock);
2035 if (processor != current_processor())
2036 machine_signal_idle(processor);
2037 return;
2038 }
2039
2040 if (new_thread->sched_pri >= BASEPRI_RTQUEUES)
2041 realtime_schedule_insert(pset, new_thread);
2042 else {
2043 if (!run_queue_enqueue(&pset->runq, new_thread, options))
2044 preempt = AST_NONE;
2045
2046 /*
2047 * Update the timesharing quanta.
2048 */
2049 timeshare_quanta_update(pset);
2050
2051 /*
2052 * Preempt check.
2053 */
2054 if (preempt != AST_NONE) {
2055 /*
2056 * First try the current processor
2057 * if it is a member of the correct
2058 * processor set.
2059 */
2060 processor = current_processor();
2061 thread = processor->active_thread;
2062 if ( pset == processor->processor_set &&
2063 csw_needed(thread, processor) ) {
2064 dispatch_counts.pset_self++;
2065 simple_unlock(&pset->sched_lock);
2066
2067 ast_on(preempt);
2068 return;
2069 }
2070
2071 /*
2072 * If that failed and we have other
2073 * processors available keep trying.
2074 */
2075 if ( pset->processor_count > 1 ||
2076 pset != processor->processor_set ) {
2077 queue_t queue = &pset->active_queue;
2078 processor_t myprocessor, lastprocessor;
2079 queue_entry_t next;
2080
2081 /*
2082 * Next try the last processor
2083 * dispatched on.
2084 */
2085 myprocessor = processor;
2086 processor = new_thread->last_processor;
2087 if ( processor != myprocessor &&
2088 processor != PROCESSOR_NULL &&
2089 processor->processor_set == pset &&
2090 processor->state == PROCESSOR_RUNNING &&
2091 new_thread->sched_pri > processor->current_pri ) {
2092 dispatch_counts.pset_last++;
2093 cause_ast_check(processor);
2094 simple_unlock(&pset->sched_lock);
2095 return;
2096 }
2097
2098 /*
2099 * Lastly, pick any other
2100 * available processor.
2101 */
2102 lastprocessor = processor;
2103 processor = (processor_t)queue_first(queue);
2104 while (!queue_end(queue, (queue_entry_t)processor)) {
2105 next = queue_next((queue_entry_t)processor);
2106
2107 if ( processor != myprocessor &&
2108 processor != lastprocessor &&
2109 new_thread->sched_pri >
2110 processor->current_pri ) {
2111 if (!queue_end(queue, next)) {
2112 remqueue(queue, (queue_entry_t)processor);
2113 enqueue_tail(queue, (queue_entry_t)processor);
2114 }
2115 dispatch_counts.pset_other++;
2116 cause_ast_check(processor);
2117 simple_unlock(&pset->sched_lock);
2118 return;
2119 }
2120
2121 processor = (processor_t)next;
2122 }
2123 }
2124 }
2125
2126 simple_unlock(&pset->sched_lock);
2127 }
2128 }
2129 else {
2130 /*
2131 * Bound, can only run on bound processor. Have to lock
2132 * processor here because it may not be the current one.
2133 */
2134 processor_lock(processor);
2135 pset = processor->processor_set;
2136 if (pset != PROCESSOR_SET_NULL) {
2137 simple_lock(&pset->sched_lock);
2138 if (processor->state == PROCESSOR_IDLE) {
2139 remqueue(&pset->idle_queue, (queue_entry_t)processor);
2140 pset->idle_count--;
2141 processor->next_thread = new_thread;
2142 processor->deadline = UINT64_MAX;
2143 processor->state = PROCESSOR_DISPATCHING;
2144 dispatch_counts.idle_bound++;
2145 simple_unlock(&pset->sched_lock);
2146 processor_unlock(processor);
2147 if (processor != current_processor())
2148 machine_signal_idle(processor);
2149 return;
2150 }
2151 }
2152
2153 if (!run_queue_enqueue(&processor->runq, new_thread, options))
2154 preempt = AST_NONE;
2155
2156 if (preempt != AST_NONE) {
2157 if (processor == current_processor()) {
2158 thread = processor->active_thread;
2159 if (csw_needed(thread, processor)) {
2160 dispatch_counts.bound_self++;
2161 ast_on(preempt);
2162 }
2163 }
2164 else
2165 if ( processor->state == PROCESSOR_RUNNING &&
2166 new_thread->sched_pri > processor->current_pri ) {
2167 dispatch_counts.bound_other++;
2168 cause_ast_check(processor);
2169 }
2170 }
2171
2172 if (pset != PROCESSOR_SET_NULL)
2173 simple_unlock(&pset->sched_lock);
2174
2175 processor_unlock(processor);
2176 }
2177 }
2178
2179 /*
2180 * Check for a possible preemption point in
2181 * the (current) thread.
2182 *
2183 * Called at splsched.
2184 */
2185 ast_t
2186 csw_check(
2187 thread_t thread,
2188 processor_t processor)
2189 {
2190 int current_pri = thread->sched_pri;
2191 ast_t result = AST_NONE;
2192 run_queue_t runq;
2193
2194 if (first_timeslice(processor)) {
2195 runq = &processor->processor_set->runq;
2196 if (runq->highq >= BASEPRI_RTQUEUES)
2197 return (AST_PREEMPT | AST_URGENT);
2198
2199 if (runq->highq > current_pri) {
2200 if (runq->urgency > 0)
2201 return (AST_PREEMPT | AST_URGENT);
2202
2203 result |= AST_PREEMPT;
2204 }
2205
2206 runq = &processor->runq;
2207 if (runq->highq > current_pri) {
2208 if (runq->urgency > 0)
2209 return (AST_PREEMPT | AST_URGENT);
2210
2211 result |= AST_PREEMPT;
2212 }
2213 }
2214 else {
2215 runq = &processor->processor_set->runq;
2216 if (runq->highq >= current_pri) {
2217 if (runq->urgency > 0)
2218 return (AST_PREEMPT | AST_URGENT);
2219
2220 result |= AST_PREEMPT;
2221 }
2222
2223 runq = &processor->runq;
2224 if (runq->highq >= current_pri) {
2225 if (runq->urgency > 0)
2226 return (AST_PREEMPT | AST_URGENT);
2227
2228 result |= AST_PREEMPT;
2229 }
2230 }
2231
2232 if (result != AST_NONE)
2233 return (result);
2234
2235 if (thread->state & TH_SUSP)
2236 result |= AST_PREEMPT;
2237
2238 return (result);
2239 }
2240
2241 /*
2242 * set_sched_pri:
2243 *
2244 * Set the scheduled priority of the specified thread.
2245 *
2246 * This may cause the thread to change queues.
2247 *
2248 * Thread must be locked.
2249 */
2250 void
2251 set_sched_pri(
2252 thread_t thread,
2253 int priority)
2254 {
2255 register struct run_queue *rq = run_queue_remove(thread);
2256
2257 if ( !(thread->sched_mode & TH_MODE_TIMESHARE) &&
2258 (priority >= BASEPRI_PREEMPT ||
2259 (thread->task_priority < MINPRI_KERNEL &&
2260 thread->task_priority >= BASEPRI_BACKGROUND &&
2261 priority > thread->task_priority) ) )
2262 thread->sched_mode |= TH_MODE_PREEMPT;
2263 else
2264 thread->sched_mode &= ~TH_MODE_PREEMPT;
2265
2266 thread->sched_pri = priority;
2267 if (rq != RUN_QUEUE_NULL)
2268 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
2269 else
2270 if (thread->state & TH_RUN) {
2271 processor_t processor = thread->last_processor;
2272
2273 if (thread == current_thread()) {
2274 ast_t preempt = csw_check(thread, processor);
2275
2276 if (preempt != AST_NONE)
2277 ast_on(preempt);
2278 processor->current_pri = priority;
2279 }
2280 else
2281 if ( processor != PROCESSOR_NULL &&
2282 processor->active_thread == thread )
2283 cause_ast_check(processor);
2284 }
2285 }
2286
2287 #if 0
2288
2289 static void
2290 run_queue_check(
2291 run_queue_t rq,
2292 thread_t thread)
2293 {
2294 queue_t q;
2295 queue_entry_t qe;
2296
2297 if (rq != thread->runq)
2298 panic("run_queue_check: thread runq");
2299
2300 if (thread->sched_pri > MAXPRI || thread->sched_pri < MINPRI)
2301 panic("run_queue_check: thread sched_pri");
2302
2303 q = &rq->queues[thread->sched_pri];
2304 qe = queue_first(q);
2305 while (!queue_end(q, qe)) {
2306 if (qe == (queue_entry_t)thread)
2307 return;
2308
2309 qe = queue_next(qe);
2310 }
2311
2312 panic("run_queue_check: end");
2313 }
2314
2315 #endif /* DEBUG */
2316
2317 /*
2318 * run_queue_remove:
2319 *
2320 * Remove a thread from its current run queue and
2321 * return the run queue if successful.
2322 *
2323 * Thread must be locked.
2324 */
2325 run_queue_t
2326 run_queue_remove(
2327 thread_t thread)
2328 {
2329 register run_queue_t rq = thread->runq;
2330
2331 /*
2332 * If rq is RUN_QUEUE_NULL, the thread will stay out of the
2333 * run queues because the caller locked the thread. Otherwise
2334 * the thread is on a run queue, but could be chosen for dispatch
2335 * and removed.
2336 */
2337 if (rq != RUN_QUEUE_NULL) {
2338 processor_set_t pset = thread->processor_set;
2339 processor_t processor = thread->bound_processor;
2340
2341 /*
2342 * The run queues are locked by the pset scheduling
2343 * lock, except when a processor is off-line the
2344 * local run queue is locked by the processor lock.
2345 */
2346 if (processor != PROCESSOR_NULL) {
2347 processor_lock(processor);
2348 pset = processor->processor_set;
2349 }
2350
2351 if (pset != PROCESSOR_SET_NULL)
2352 simple_lock(&pset->sched_lock);
2353
2354 if (rq == thread->runq) {
2355 /*
2356 * Thread is on a run queue and we have a lock on
2357 * that run queue.
2358 */
2359 remqueue(&rq->queues[0], (queue_entry_t)thread);
2360 rq->count--;
2361 if (thread->sched_mode & TH_MODE_PREEMPT)
2362 rq->urgency--;
2363 assert(rq->urgency >= 0);
2364
2365 if (queue_empty(rq->queues + thread->sched_pri)) {
2366 /* update run queue status */
2367 if (thread->sched_pri != IDLEPRI)
2368 clrbit(MAXPRI - thread->sched_pri, rq->bitmap);
2369 rq->highq = MAXPRI - ffsbit(rq->bitmap);
2370 }
2371
2372 thread->runq = RUN_QUEUE_NULL;
2373 }
2374 else {
2375 /*
2376 * The thread left the run queue before we could
2377 * lock the run queue.
2378 */
2379 assert(thread->runq == RUN_QUEUE_NULL);
2380 rq = RUN_QUEUE_NULL;
2381 }
2382
2383 if (pset != PROCESSOR_SET_NULL)
2384 simple_unlock(&pset->sched_lock);
2385
2386 if (processor != PROCESSOR_NULL)
2387 processor_unlock(processor);
2388 }
2389
2390 return (rq);
2391 }
2392
2393 /*
2394 * choose_thread:
2395 *
2396 * Remove a thread to execute from the run queues
2397 * and return it.
2398 *
2399 * Called with pset scheduling lock held.
2400 */
2401 static thread_t
2402 choose_thread(
2403 processor_set_t pset,
2404 processor_t processor)
2405 {
2406 register run_queue_t runq;
2407 register thread_t thread;
2408 register queue_t q;
2409
2410 runq = &processor->runq;
2411
2412 if (runq->count > 0 && runq->highq >= pset->runq.highq) {
2413 q = runq->queues + runq->highq;
2414
2415 thread = (thread_t)q->next;
2416 ((queue_entry_t)thread)->next->prev = q;
2417 q->next = ((queue_entry_t)thread)->next;
2418 thread->runq = RUN_QUEUE_NULL;
2419 runq->count--;
2420 if (thread->sched_mode & TH_MODE_PREEMPT)
2421 runq->urgency--;
2422 assert(runq->urgency >= 0);
2423 if (queue_empty(q)) {
2424 if (runq->highq != IDLEPRI)
2425 clrbit(MAXPRI - runq->highq, runq->bitmap);
2426 runq->highq = MAXPRI - ffsbit(runq->bitmap);
2427 }
2428
2429 processor->deadline = UINT64_MAX;
2430
2431 return (thread);
2432 }
2433
2434 runq = &pset->runq;
2435
2436 assert(runq->count > 0);
2437 q = runq->queues + runq->highq;
2438
2439 thread = (thread_t)q->next;
2440 ((queue_entry_t)thread)->next->prev = q;
2441 q->next = ((queue_entry_t)thread)->next;
2442 thread->runq = RUN_QUEUE_NULL;
2443 runq->count--;
2444 if (runq->highq >= BASEPRI_RTQUEUES)
2445 processor->deadline = thread->realtime.deadline;
2446 else
2447 processor->deadline = UINT64_MAX;
2448 if (thread->sched_mode & TH_MODE_PREEMPT)
2449 runq->urgency--;
2450 assert(runq->urgency >= 0);
2451 if (queue_empty(q)) {
2452 if (runq->highq != IDLEPRI)
2453 clrbit(MAXPRI - runq->highq, runq->bitmap);
2454 runq->highq = MAXPRI - ffsbit(runq->bitmap);
2455 }
2456
2457 timeshare_quanta_update(pset);
2458
2459 return (thread);
2460 }
2461
2462 static processor_t
2463 delay_idle(
2464 processor_t processor,
2465 thread_t self)
2466 {
2467 int *gcount, *lcount;
2468 uint64_t abstime, spin, limit;
2469
2470 lcount = &processor->runq.count;
2471 gcount = &processor->processor_set->runq.count;
2472
2473 abstime = mach_absolute_time();
2474 limit = abstime + delay_idle_limit;
2475 spin = abstime + delay_idle_spin;
2476
2477 timer_event((uint32_t)abstime, &processor->idle_thread->system_timer);
2478
2479 self->options |= TH_OPT_DELAYIDLE;
2480
2481 while ( *gcount == 0 && *lcount == 0 &&
2482 (self->state & TH_WAIT) != 0 &&
2483 abstime < limit ) {
2484 if (abstime >= spin) {
2485 (void)spllo();
2486
2487 (void)splsched();
2488 processor = current_processor();
2489 lcount = &processor->runq.count;
2490 gcount = &processor->processor_set->runq.count;
2491
2492 abstime = mach_absolute_time();
2493 spin = abstime + delay_idle_spin;
2494
2495 timer_event((uint32_t)abstime, &processor->idle_thread->system_timer);
2496 }
2497 else
2498 abstime = mach_absolute_time();
2499 }
2500
2501 timer_event((uint32_t)abstime, &self->system_timer);
2502
2503 self->options &= ~TH_OPT_DELAYIDLE;
2504
2505 return (processor);
2506 }
2507
2508 /*
2509 * no_dispatch_count counts number of times processors go non-idle
2510 * without being dispatched. This should be very rare.
2511 */
2512 int no_dispatch_count = 0;
2513
2514 /*
2515 * This is the idle processor thread, which just looks for other threads
2516 * to execute.
2517 */
2518 void
2519 idle_thread(void)
2520 {
2521 register processor_t processor;
2522 register thread_t *threadp;
2523 register int *gcount;
2524 register int *lcount;
2525 register thread_t new_thread;
2526 register int state;
2527 register processor_set_t pset;
2528 ast_t *myast = ast_pending();
2529
2530 processor = current_processor();
2531
2532 threadp = &processor->next_thread;
2533 lcount = &processor->runq.count;
2534 gcount = &processor->processor_set->runq.count;
2535
2536
2537 (void)splsched(); /* Turn interruptions off */
2538
2539 #ifdef __ppc__
2540 pmsDown(); /* Step power down. Note: interruptions must be disabled for this call */
2541 #endif
2542
2543 while ( (*threadp == THREAD_NULL) &&
2544 (*gcount == 0) && (*lcount == 0) ) {
2545
2546 /* check for ASTs while we wait */
2547 if (*myast &~ (AST_SCHEDULING | AST_BSD)) {
2548 /* no ASTs for us */
2549 *myast &= AST_NONE;
2550 (void)spllo();
2551 }
2552 else
2553 machine_idle();
2554
2555 (void)splsched();
2556 }
2557
2558 /*
2559 * This is not a switch statement to avoid the
2560 * bounds checking code in the common case.
2561 */
2562 pset = processor->processor_set;
2563 simple_lock(&pset->sched_lock);
2564
2565 #ifdef __ppc__
2566 pmsStep(0); /* Step up out of idle power, may start timer for next step */
2567 #endif
2568
2569 state = processor->state;
2570 if (state == PROCESSOR_DISPATCHING) {
2571 /*
2572 * Commmon case -- cpu dispatched.
2573 */
2574 new_thread = *threadp;
2575 *threadp = (volatile thread_t) THREAD_NULL;
2576 processor->state = PROCESSOR_RUNNING;
2577 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
2578
2579 if ( pset->runq.highq >= BASEPRI_RTQUEUES &&
2580 new_thread->sched_pri >= BASEPRI_RTQUEUES ) {
2581 register run_queue_t runq = &pset->runq;
2582 register queue_t q;
2583
2584 q = runq->queues + runq->highq;
2585 if (((thread_t)q->next)->realtime.deadline <
2586 processor->deadline) {
2587 thread_t thread = new_thread;
2588
2589 new_thread = (thread_t)q->next;
2590 ((queue_entry_t)new_thread)->next->prev = q;
2591 q->next = ((queue_entry_t)new_thread)->next;
2592 new_thread->runq = RUN_QUEUE_NULL;
2593 processor->deadline = new_thread->realtime.deadline;
2594 assert(new_thread->sched_mode & TH_MODE_PREEMPT);
2595 runq->count--; runq->urgency--;
2596 if (queue_empty(q)) {
2597 if (runq->highq != IDLEPRI)
2598 clrbit(MAXPRI - runq->highq, runq->bitmap);
2599 runq->highq = MAXPRI - ffsbit(runq->bitmap);
2600 }
2601 dispatch_counts.missed_realtime++;
2602 simple_unlock(&pset->sched_lock);
2603
2604 thread_lock(thread);
2605 thread_setrun(thread, SCHED_HEADQ);
2606 thread_unlock(thread);
2607
2608 counter(c_idle_thread_handoff++);
2609 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
2610 /*NOTREACHED*/
2611 }
2612 simple_unlock(&pset->sched_lock);
2613
2614 counter(c_idle_thread_handoff++);
2615 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
2616 /*NOTREACHED*/
2617 }
2618
2619 if ( processor->runq.highq > new_thread->sched_pri ||
2620 pset->runq.highq > new_thread->sched_pri ) {
2621 thread_t thread = new_thread;
2622
2623 new_thread = choose_thread(pset, processor);
2624 dispatch_counts.missed_other++;
2625 simple_unlock(&pset->sched_lock);
2626
2627 thread_lock(thread);
2628 thread_setrun(thread, SCHED_HEADQ);
2629 thread_unlock(thread);
2630
2631 counter(c_idle_thread_handoff++);
2632 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
2633 /* NOTREACHED */
2634 }
2635 else {
2636 simple_unlock(&pset->sched_lock);
2637
2638 counter(c_idle_thread_handoff++);
2639 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
2640 /* NOTREACHED */
2641 }
2642 }
2643 else
2644 if (state == PROCESSOR_IDLE) {
2645 /*
2646 * Processor was not dispatched (Rare).
2647 * Set it running again and force a
2648 * reschedule.
2649 */
2650 no_dispatch_count++;
2651 pset->idle_count--;
2652 remqueue(&pset->idle_queue, (queue_entry_t)processor);
2653 processor->state = PROCESSOR_RUNNING;
2654 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
2655 simple_unlock(&pset->sched_lock);
2656
2657 counter(c_idle_thread_block++);
2658 thread_block((thread_continue_t)idle_thread);
2659 /* NOTREACHED */
2660 }
2661 else
2662 if (state == PROCESSOR_SHUTDOWN) {
2663 /*
2664 * Going off-line. Force a
2665 * reschedule.
2666 */
2667 if ((new_thread = (thread_t)*threadp) != THREAD_NULL) {
2668 *threadp = (volatile thread_t) THREAD_NULL;
2669 processor->deadline = UINT64_MAX;
2670 simple_unlock(&pset->sched_lock);
2671
2672 thread_lock(new_thread);
2673 thread_setrun(new_thread, SCHED_HEADQ);
2674 thread_unlock(new_thread);
2675 }
2676 else
2677 simple_unlock(&pset->sched_lock);
2678
2679 counter(c_idle_thread_block++);
2680 thread_block((thread_continue_t)idle_thread);
2681 /* NOTREACHED */
2682 }
2683
2684 simple_unlock(&pset->sched_lock);
2685
2686 panic("idle_thread: state %d\n", processor->state);
2687 /*NOTREACHED*/
2688 }
2689
2690 kern_return_t
2691 idle_thread_create(
2692 processor_t processor)
2693 {
2694 kern_return_t result;
2695 thread_t thread;
2696 spl_t s;
2697
2698 result = kernel_thread_create((thread_continue_t)idle_thread, NULL, MAXPRI_KERNEL, &thread);
2699 if (result != KERN_SUCCESS)
2700 return (result);
2701
2702 s = splsched();
2703 thread_lock(thread);
2704 thread->bound_processor = processor;
2705 processor->idle_thread = thread;
2706 thread->sched_pri = thread->priority = IDLEPRI;
2707 thread->state = (TH_RUN | TH_IDLE);
2708 thread_unlock(thread);
2709 splx(s);
2710
2711 thread_deallocate(thread);
2712
2713 return (KERN_SUCCESS);
2714 }
2715
2716 static uint64_t sched_tick_deadline;
2717
2718 /*
2719 * sched_startup:
2720 *
2721 * Kicks off scheduler services.
2722 *
2723 * Called at splsched.
2724 */
2725 void
2726 sched_startup(void)
2727 {
2728 kern_return_t result;
2729 thread_t thread;
2730
2731 result = kernel_thread_start_priority((thread_continue_t)sched_tick_thread, NULL, MAXPRI_KERNEL, &thread);
2732 if (result != KERN_SUCCESS)
2733 panic("sched_startup");
2734
2735 thread_deallocate(thread);
2736
2737 /*
2738 * Yield to the sched_tick_thread while it times
2739 * a series of context switches back. It stores
2740 * the baseline value in sched_cswtime.
2741 *
2742 * The current thread is the only other thread
2743 * active at this point.
2744 */
2745 while (sched_cswtime == 0)
2746 thread_block(THREAD_CONTINUE_NULL);
2747
2748 thread_daemon_init();
2749
2750 thread_call_initialize();
2751 }
2752
2753 /*
2754 * sched_tick_thread:
2755 *
2756 * Perform periodic bookkeeping functions about ten
2757 * times per second.
2758 */
2759 static void
2760 sched_tick_continue(void)
2761 {
2762 uint64_t abstime = mach_absolute_time();
2763
2764 sched_tick++;
2765
2766 /*
2767 * Compute various averages.
2768 */
2769 compute_averages();
2770
2771 /*
2772 * Scan the run queues for threads which
2773 * may need to be updated.
2774 */
2775 thread_update_scan();
2776
2777 clock_deadline_for_periodic_event(sched_tick_interval, abstime,
2778 &sched_tick_deadline);
2779
2780 assert_wait_deadline((event_t)sched_tick_thread, THREAD_UNINT, sched_tick_deadline);
2781 thread_block((thread_continue_t)sched_tick_continue);
2782 /*NOTREACHED*/
2783 }
2784
2785 /*
2786 * Time a series of context switches to determine
2787 * a baseline. Toss the high and low and return
2788 * the one-way value.
2789 */
2790 static uint32_t
2791 time_cswitch(void)
2792 {
2793 uint32_t new, hi, low, accum;
2794 uint64_t abstime;
2795 int i, tries = 7;
2796
2797 accum = hi = low = 0;
2798 for (i = 0; i < tries; ++i) {
2799 abstime = mach_absolute_time();
2800 thread_block(THREAD_CONTINUE_NULL);
2801
2802 new = mach_absolute_time() - abstime;
2803
2804 if (i == 0)
2805 accum = hi = low = new;
2806 else {
2807 if (new < low)
2808 low = new;
2809 else
2810 if (new > hi)
2811 hi = new;
2812 accum += new;
2813 }
2814 }
2815
2816 return ((accum - hi - low) / (2 * (tries - 2)));
2817 }
2818
2819 void
2820 sched_tick_thread(void)
2821 {
2822 sched_cswtime = time_cswitch();
2823
2824 sched_tick_deadline = mach_absolute_time();
2825
2826 sched_tick_continue();
2827 /*NOTREACHED*/
2828 }
2829
2830 /*
2831 * thread_update_scan / runq_scan:
2832 *
2833 * Scan the run queues to account for timesharing threads
2834 * which need to be updated.
2835 *
2836 * Scanner runs in two passes. Pass one squirrels likely
2837 * threads away in an array, pass two does the update.
2838 *
2839 * This is necessary because the run queue is locked for
2840 * the candidate scan, but the thread is locked for the update.
2841 *
2842 * Array should be sized to make forward progress, without
2843 * disabling preemption for long periods.
2844 */
2845
2846 #define THREAD_UPDATE_SIZE 128
2847
2848 static thread_t thread_update_array[THREAD_UPDATE_SIZE];
2849 static int thread_update_count = 0;
2850
2851 /*
2852 * Scan a runq for candidate threads.
2853 *
2854 * Returns TRUE if retry is needed.
2855 */
2856 static boolean_t
2857 runq_scan(
2858 run_queue_t runq)
2859 {
2860 register int count;
2861 register queue_t q;
2862 register thread_t thread;
2863
2864 if ((count = runq->count) > 0) {
2865 q = runq->queues + runq->highq;
2866 while (count > 0) {
2867 queue_iterate(q, thread, thread_t, links) {
2868 if ( thread->sched_stamp != sched_tick &&
2869 (thread->sched_mode & TH_MODE_TIMESHARE) ) {
2870 if (thread_update_count == THREAD_UPDATE_SIZE)
2871 return (TRUE);
2872
2873 thread_update_array[thread_update_count++] = thread;
2874 thread_reference_internal(thread);
2875 }
2876
2877 count--;
2878 }
2879
2880 q--;
2881 }
2882 }
2883
2884 return (FALSE);
2885 }
2886
2887 static void
2888 thread_update_scan(void)
2889 {
2890 register boolean_t restart_needed;
2891 register processor_set_t pset = &default_pset;
2892 register processor_t processor;
2893 register thread_t thread;
2894 spl_t s;
2895
2896 do {
2897 s = splsched();
2898 simple_lock(&pset->sched_lock);
2899 restart_needed = runq_scan(&pset->runq);
2900 simple_unlock(&pset->sched_lock);
2901
2902 if (!restart_needed) {
2903 simple_lock(&pset->sched_lock);
2904 processor = (processor_t)queue_first(&pset->processors);
2905 while (!queue_end(&pset->processors, (queue_entry_t)processor)) {
2906 if ((restart_needed = runq_scan(&processor->runq)) != 0)
2907 break;
2908
2909 thread = processor->idle_thread;
2910 if (thread->sched_stamp != sched_tick) {
2911 if (thread_update_count == THREAD_UPDATE_SIZE) {
2912 restart_needed = TRUE;
2913 break;
2914 }
2915
2916 thread_update_array[thread_update_count++] = thread;
2917 thread_reference_internal(thread);
2918 }
2919
2920 processor = (processor_t)queue_next(&processor->processors);
2921 }
2922 simple_unlock(&pset->sched_lock);
2923 }
2924 splx(s);
2925
2926 /*
2927 * Ok, we now have a collection of candidates -- fix them.
2928 */
2929 while (thread_update_count > 0) {
2930 thread = thread_update_array[--thread_update_count];
2931 thread_update_array[thread_update_count] = THREAD_NULL;
2932
2933 s = splsched();
2934 thread_lock(thread);
2935 if ( !(thread->state & (TH_WAIT|TH_SUSP)) &&
2936 thread->sched_stamp != sched_tick )
2937 update_priority(thread);
2938 thread_unlock(thread);
2939 splx(s);
2940
2941 thread_deallocate(thread);
2942 }
2943 } while (restart_needed);
2944 }
2945
2946 /*
2947 * Just in case someone doesn't use the macro
2948 */
2949 #undef thread_wakeup
2950 void
2951 thread_wakeup(
2952 event_t x);
2953
2954 void
2955 thread_wakeup(
2956 event_t x)
2957 {
2958 thread_wakeup_with_result(x, THREAD_AWAKENED);
2959 }
2960
2961 boolean_t
2962 preemption_enabled(void)
2963 {
2964 return (get_preemption_level() == 0 && ml_get_interrupts_enabled());
2965 }
2966
2967 #if DEBUG
2968 static boolean_t
2969 thread_runnable(
2970 thread_t thread)
2971 {
2972 return ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN);
2973 }
2974 #endif /* DEBUG */
2975
2976 #if MACH_KDB
2977 #include <ddb/db_output.h>
2978 #define printf kdbprintf
2979 void db_sched(void);
2980
2981 void
2982 db_sched(void)
2983 {
2984 iprintf("Scheduling Statistics:\n");
2985 db_indent += 2;
2986 iprintf("Thread invocations: csw %d same %d\n",
2987 c_thread_invoke_csw, c_thread_invoke_same);
2988 #if MACH_COUNTERS
2989 iprintf("Thread block: calls %d\n",
2990 c_thread_block_calls);
2991 iprintf("Idle thread:\n\thandoff %d block %d no_dispatch %d\n",
2992 c_idle_thread_handoff,
2993 c_idle_thread_block, no_dispatch_count);
2994 iprintf("Sched thread blocks: %d\n", c_sched_thread_block);
2995 #endif /* MACH_COUNTERS */
2996 db_indent -= 2;
2997 }
2998
2999 #include <ddb/db_output.h>
3000 void db_show_thread_log(void);
3001
3002 void
3003 db_show_thread_log(void)
3004 {
3005 }
3006 #endif /* MACH_KDB */