]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/sched_prim.c
xnu-792.1.5.tar.gz
[apple/xnu.git] / osfmk / kern / sched_prim.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_FREE_COPYRIGHT@
24 */
25 /*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50 /*
51 */
52 /*
53 * File: sched_prim.c
54 * Author: Avadis Tevanian, Jr.
55 * Date: 1986
56 *
57 * Scheduling primitives
58 *
59 */
60
61 #include <debug.h>
62 #include <mach_kdb.h>
63
64 #include <ddb/db_output.h>
65
66 #include <mach/mach_types.h>
67 #include <mach/machine.h>
68 #include <mach/policy.h>
69 #include <mach/sync_policy.h>
70
71 #include <machine/machine_routines.h>
72 #include <machine/sched_param.h>
73
74 #include <kern/kern_types.h>
75 #include <kern/clock.h>
76 #include <kern/counters.h>
77 #include <kern/cpu_number.h>
78 #include <kern/cpu_data.h>
79 #include <kern/debug.h>
80 #include <kern/lock.h>
81 #include <kern/macro_help.h>
82 #include <kern/machine.h>
83 #include <kern/misc_protos.h>
84 #include <kern/processor.h>
85 #include <kern/queue.h>
86 #include <kern/sched.h>
87 #include <kern/sched_prim.h>
88 #include <kern/syscall_subr.h>
89 #include <kern/task.h>
90 #include <kern/thread.h>
91 #include <kern/wait_queue.h>
92
93 #include <vm/pmap.h>
94 #include <vm/vm_kern.h>
95 #include <vm/vm_map.h>
96
97 #include <sys/kdebug.h>
98
99 #define DEFAULT_PREEMPTION_RATE 100 /* (1/s) */
100 int default_preemption_rate = DEFAULT_PREEMPTION_RATE;
101
102 #define MAX_UNSAFE_QUANTA 800
103 int max_unsafe_quanta = MAX_UNSAFE_QUANTA;
104
105 #define MAX_POLL_QUANTA 2
106 int max_poll_quanta = MAX_POLL_QUANTA;
107
108 #define SCHED_POLL_YIELD_SHIFT 4 /* 1/16 */
109 int sched_poll_yield_shift = SCHED_POLL_YIELD_SHIFT;
110
111 uint64_t max_unsafe_computation;
112 uint32_t sched_safe_duration;
113 uint64_t max_poll_computation;
114
115 uint32_t std_quantum;
116 uint32_t min_std_quantum;
117
118 uint32_t std_quantum_us;
119
120 uint32_t max_rt_quantum;
121 uint32_t min_rt_quantum;
122
123 uint32_t sched_cswtime;
124
125 static uint32_t delay_idle_limit, delay_idle_spin;
126 static processor_t delay_idle(
127 processor_t processor,
128 thread_t self);
129
130 unsigned sched_tick;
131 uint32_t sched_tick_interval;
132
133 uint32_t sched_pri_shift;
134
135 /* Forwards */
136 void wait_queues_init(void);
137
138 static void load_shift_init(void);
139
140 static thread_t choose_thread(
141 processor_set_t pset,
142 processor_t processor);
143
144 static void thread_update_scan(void);
145
146 #if DEBUG
147 static
148 boolean_t thread_runnable(
149 thread_t thread);
150
151 #endif /*DEBUG*/
152
153
154 /*
155 * State machine
156 *
157 * states are combinations of:
158 * R running
159 * W waiting (or on wait queue)
160 * N non-interruptible
161 * O swapped out
162 * I being swapped in
163 *
164 * init action
165 * assert_wait thread_block clear_wait swapout swapin
166 *
167 * R RW, RWN R; setrun - -
168 * RN RWN RN; setrun - -
169 *
170 * RW W R -
171 * RWN WN RN -
172 *
173 * W R; setrun WO
174 * WN RN; setrun -
175 *
176 * RO - - R
177 *
178 */
179
180 /*
181 * Waiting protocols and implementation:
182 *
183 * Each thread may be waiting for exactly one event; this event
184 * is set using assert_wait(). That thread may be awakened either
185 * by performing a thread_wakeup_prim() on its event,
186 * or by directly waking that thread up with clear_wait().
187 *
188 * The implementation of wait events uses a hash table. Each
189 * bucket is queue of threads having the same hash function
190 * value; the chain for the queue (linked list) is the run queue
191 * field. [It is not possible to be waiting and runnable at the
192 * same time.]
193 *
194 * Locks on both the thread and on the hash buckets govern the
195 * wait event field and the queue chain field. Because wakeup
196 * operations only have the event as an argument, the event hash
197 * bucket must be locked before any thread.
198 *
199 * Scheduling operations may also occur at interrupt level; therefore,
200 * interrupts below splsched() must be prevented when holding
201 * thread or hash bucket locks.
202 *
203 * The wait event hash table declarations are as follows:
204 */
205
206 #define NUMQUEUES 59
207
208 struct wait_queue wait_queues[NUMQUEUES];
209
210 #define wait_hash(event) \
211 ((((int)(event) < 0)? ~(int)(event): (int)(event)) % NUMQUEUES)
212
213 int8_t sched_load_shifts[NRQS];
214
215 void
216 sched_init(void)
217 {
218 /*
219 * Calculate the timeslicing quantum
220 * in us.
221 */
222 if (default_preemption_rate < 1)
223 default_preemption_rate = DEFAULT_PREEMPTION_RATE;
224 std_quantum_us = (1000 * 1000) / default_preemption_rate;
225
226 printf("standard timeslicing quantum is %d us\n", std_quantum_us);
227
228 sched_safe_duration = (2 * max_unsafe_quanta / default_preemption_rate) *
229 (1 << SCHED_TICK_SHIFT);
230
231 wait_queues_init();
232 load_shift_init();
233 pset_init(&default_pset);
234 sched_tick = 0;
235 ast_init();
236 }
237
238 void
239 sched_timebase_init(void)
240 {
241 uint64_t abstime;
242 uint32_t shift;
243
244 /* standard timeslicing quantum */
245 clock_interval_to_absolutetime_interval(
246 std_quantum_us, NSEC_PER_USEC, &abstime);
247 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
248 std_quantum = abstime;
249
250 /* smallest remaining quantum (250 us) */
251 clock_interval_to_absolutetime_interval(250, NSEC_PER_USEC, &abstime);
252 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
253 min_std_quantum = abstime;
254
255 /* smallest rt computaton (50 us) */
256 clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime);
257 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
258 min_rt_quantum = abstime;
259
260 /* maximum rt computation (50 ms) */
261 clock_interval_to_absolutetime_interval(
262 50, 1000*NSEC_PER_USEC, &abstime);
263 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
264 max_rt_quantum = abstime;
265
266 /* scheduler tick interval */
267 clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT,
268 NSEC_PER_USEC, &abstime);
269 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
270 sched_tick_interval = abstime;
271
272 /*
273 * Compute conversion factor from usage to
274 * timesharing priorities with 5/8 ** n aging.
275 */
276 abstime = (abstime * 5) / 3;
277 for (shift = 0; abstime > BASEPRI_DEFAULT; ++shift)
278 abstime >>= 1;
279 sched_pri_shift = shift;
280
281 max_unsafe_computation = max_unsafe_quanta * std_quantum;
282 max_poll_computation = max_poll_quanta * std_quantum;
283
284 /* delay idle constant(s) (60, 1 us) */
285 clock_interval_to_absolutetime_interval(60, NSEC_PER_USEC, &abstime);
286 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
287 delay_idle_limit = abstime;
288
289 clock_interval_to_absolutetime_interval(1, NSEC_PER_USEC, &abstime);
290 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
291 delay_idle_spin = abstime;
292 }
293
294 void
295 wait_queues_init(void)
296 {
297 register int i;
298
299 for (i = 0; i < NUMQUEUES; i++) {
300 wait_queue_init(&wait_queues[i], SYNC_POLICY_FIFO);
301 }
302 }
303
304 /*
305 * Set up values for timeshare
306 * loading factors.
307 */
308 static void
309 load_shift_init(void)
310 {
311 int8_t k, *p = sched_load_shifts;
312 uint32_t i, j;
313
314 *p++ = INT8_MIN; *p++ = 0;
315
316 for (i = j = 2, k = 1; i < NRQS; ++k) {
317 for (j <<= 1; i < j; ++i)
318 *p++ = k;
319 }
320 }
321
322 /*
323 * Thread wait timer expiration.
324 */
325 void
326 thread_timer_expire(
327 void *p0,
328 __unused void *p1)
329 {
330 thread_t thread = p0;
331 spl_t s;
332
333 s = splsched();
334 thread_lock(thread);
335 if (--thread->wait_timer_active == 0) {
336 if (thread->wait_timer_is_set) {
337 thread->wait_timer_is_set = FALSE;
338 clear_wait_internal(thread, THREAD_TIMED_OUT);
339 }
340 }
341 thread_unlock(thread);
342 splx(s);
343 }
344
345 /*
346 * thread_set_timer:
347 *
348 * Set a timer for the current thread, if the thread
349 * is ready to wait. Must be called between assert_wait()
350 * and thread_block().
351 */
352 void
353 thread_set_timer(
354 uint32_t interval,
355 uint32_t scale_factor)
356 {
357 thread_t thread = current_thread();
358 uint64_t deadline;
359 spl_t s;
360
361 s = splsched();
362 thread_lock(thread);
363 if ((thread->state & TH_WAIT) != 0) {
364 clock_interval_to_deadline(interval, scale_factor, &deadline);
365 if (!timer_call_enter(&thread->wait_timer, deadline))
366 thread->wait_timer_active++;
367 thread->wait_timer_is_set = TRUE;
368 }
369 thread_unlock(thread);
370 splx(s);
371 }
372
373 void
374 thread_set_timer_deadline(
375 uint64_t deadline)
376 {
377 thread_t thread = current_thread();
378 spl_t s;
379
380 s = splsched();
381 thread_lock(thread);
382 if ((thread->state & TH_WAIT) != 0) {
383 if (!timer_call_enter(&thread->wait_timer, deadline))
384 thread->wait_timer_active++;
385 thread->wait_timer_is_set = TRUE;
386 }
387 thread_unlock(thread);
388 splx(s);
389 }
390
391 void
392 thread_cancel_timer(void)
393 {
394 thread_t thread = current_thread();
395 spl_t s;
396
397 s = splsched();
398 thread_lock(thread);
399 if (thread->wait_timer_is_set) {
400 if (timer_call_cancel(&thread->wait_timer))
401 thread->wait_timer_active--;
402 thread->wait_timer_is_set = FALSE;
403 }
404 thread_unlock(thread);
405 splx(s);
406 }
407
408 /*
409 * thread_unblock:
410 *
411 * Unblock thread on wake up.
412 *
413 * Returns TRUE if the thread is still running.
414 *
415 * Thread must be locked.
416 */
417 boolean_t
418 thread_unblock(
419 thread_t thread,
420 wait_result_t wresult)
421 {
422 boolean_t result = FALSE;
423
424 /*
425 * Set wait_result.
426 */
427 thread->wait_result = wresult;
428
429 /*
430 * Cancel pending wait timer.
431 */
432 if (thread->wait_timer_is_set) {
433 if (timer_call_cancel(&thread->wait_timer))
434 thread->wait_timer_active--;
435 thread->wait_timer_is_set = FALSE;
436 }
437
438 /*
439 * Update scheduling state.
440 */
441 thread->state &= ~(TH_WAIT|TH_UNINT);
442
443 if (!(thread->state & TH_RUN)) {
444 thread->state |= TH_RUN;
445
446 /*
447 * Mark unblocked if call out.
448 */
449 if (thread->options & TH_OPT_CALLOUT)
450 call_thread_unblock();
451
452 /*
453 * Update pset run counts.
454 */
455 pset_run_incr(thread->processor_set);
456 if (thread->sched_mode & TH_MODE_TIMESHARE)
457 pset_share_incr(thread->processor_set);
458 }
459 else
460 result = TRUE;
461
462 /*
463 * Calculate deadline for real-time threads.
464 */
465 if (thread->sched_mode & TH_MODE_REALTIME) {
466 thread->realtime.deadline = mach_absolute_time();
467 thread->realtime.deadline += thread->realtime.constraint;
468 }
469
470 /*
471 * Clear old quantum, fail-safe computation, etc.
472 */
473 thread->current_quantum = 0;
474 thread->computation_metered = 0;
475 thread->reason = AST_NONE;
476
477 KERNEL_DEBUG_CONSTANT(
478 MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE,
479 (int)thread, (int)thread->sched_pri, 0, 0, 0);
480
481 return (result);
482 }
483
484 /*
485 * Routine: thread_go
486 * Purpose:
487 * Unblock and dispatch thread.
488 * Conditions:
489 * thread lock held, IPC locks may be held.
490 * thread must have been pulled from wait queue under same lock hold.
491 * Returns:
492 * KERN_SUCCESS - Thread was set running
493 * KERN_NOT_WAITING - Thread was not waiting
494 */
495 kern_return_t
496 thread_go(
497 thread_t thread,
498 wait_result_t wresult)
499 {
500 assert(thread->at_safe_point == FALSE);
501 assert(thread->wait_event == NO_EVENT64);
502 assert(thread->wait_queue == WAIT_QUEUE_NULL);
503
504 if ((thread->state & (TH_WAIT|TH_TERMINATE)) == TH_WAIT) {
505 if (!thread_unblock(thread, wresult))
506 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
507
508 return (KERN_SUCCESS);
509 }
510
511 return (KERN_NOT_WAITING);
512 }
513
514 /*
515 * Routine: thread_mark_wait_locked
516 * Purpose:
517 * Mark a thread as waiting. If, given the circumstances,
518 * it doesn't want to wait (i.e. already aborted), then
519 * indicate that in the return value.
520 * Conditions:
521 * at splsched() and thread is locked.
522 */
523 __private_extern__
524 wait_result_t
525 thread_mark_wait_locked(
526 thread_t thread,
527 wait_interrupt_t interruptible)
528 {
529 boolean_t at_safe_point;
530
531 /*
532 * The thread may have certain types of interrupts/aborts masked
533 * off. Even if the wait location says these types of interrupts
534 * are OK, we have to honor mask settings (outer-scoped code may
535 * not be able to handle aborts at the moment).
536 */
537 if (interruptible > (thread->options & TH_OPT_INTMASK))
538 interruptible = thread->options & TH_OPT_INTMASK;
539
540 at_safe_point = (interruptible == THREAD_ABORTSAFE);
541
542 if ( interruptible == THREAD_UNINT ||
543 !(thread->state & TH_ABORT) ||
544 (!at_safe_point &&
545 (thread->state & TH_ABORT_SAFELY)) ) {
546 thread->state |= (interruptible) ? TH_WAIT : (TH_WAIT | TH_UNINT);
547 thread->at_safe_point = at_safe_point;
548 return (thread->wait_result = THREAD_WAITING);
549 }
550 else
551 if (thread->state & TH_ABORT_SAFELY)
552 thread->state &= ~(TH_ABORT|TH_ABORT_SAFELY);
553
554 return (thread->wait_result = THREAD_INTERRUPTED);
555 }
556
557 /*
558 * Routine: thread_interrupt_level
559 * Purpose:
560 * Set the maximum interruptible state for the
561 * current thread. The effective value of any
562 * interruptible flag passed into assert_wait
563 * will never exceed this.
564 *
565 * Useful for code that must not be interrupted,
566 * but which calls code that doesn't know that.
567 * Returns:
568 * The old interrupt level for the thread.
569 */
570 __private_extern__
571 wait_interrupt_t
572 thread_interrupt_level(
573 wait_interrupt_t new_level)
574 {
575 thread_t thread = current_thread();
576 wait_interrupt_t result = thread->options & TH_OPT_INTMASK;
577
578 thread->options = (thread->options & ~TH_OPT_INTMASK) | (new_level & TH_OPT_INTMASK);
579
580 return result;
581 }
582
583 /*
584 * Check to see if an assert wait is possible, without actually doing one.
585 * This is used by debug code in locks and elsewhere to verify that it is
586 * always OK to block when trying to take a blocking lock (since waiting
587 * for the actual assert_wait to catch the case may make it hard to detect
588 * this case.
589 */
590 boolean_t
591 assert_wait_possible(void)
592 {
593
594 thread_t thread;
595
596 #if DEBUG
597 if(debug_mode) return TRUE; /* Always succeed in debug mode */
598 #endif
599
600 thread = current_thread();
601
602 return (thread == NULL || wait_queue_assert_possible(thread));
603 }
604
605 /*
606 * assert_wait:
607 *
608 * Assert that the current thread is about to go to
609 * sleep until the specified event occurs.
610 */
611 wait_result_t
612 assert_wait(
613 event_t event,
614 wait_interrupt_t interruptible)
615 {
616 register wait_queue_t wq;
617 register int index;
618
619 assert(event != NO_EVENT);
620
621 index = wait_hash(event);
622 wq = &wait_queues[index];
623 return wait_queue_assert_wait(wq, event, interruptible, 0);
624 }
625
626 wait_result_t
627 assert_wait_timeout(
628 event_t event,
629 wait_interrupt_t interruptible,
630 uint32_t interval,
631 uint32_t scale_factor)
632 {
633 thread_t thread = current_thread();
634 wait_result_t wresult;
635 wait_queue_t wqueue;
636 uint64_t deadline;
637 spl_t s;
638
639 assert(event != NO_EVENT);
640 wqueue = &wait_queues[wait_hash(event)];
641
642 s = splsched();
643 wait_queue_lock(wqueue);
644 thread_lock(thread);
645
646 clock_interval_to_deadline(interval, scale_factor, &deadline);
647 wresult = wait_queue_assert_wait64_locked(wqueue, (uint32_t)event,
648 interruptible, deadline, thread);
649
650 thread_unlock(thread);
651 wait_queue_unlock(wqueue);
652 splx(s);
653
654 return (wresult);
655 }
656
657 wait_result_t
658 assert_wait_deadline(
659 event_t event,
660 wait_interrupt_t interruptible,
661 uint64_t deadline)
662 {
663 thread_t thread = current_thread();
664 wait_result_t wresult;
665 wait_queue_t wqueue;
666 spl_t s;
667
668 assert(event != NO_EVENT);
669 wqueue = &wait_queues[wait_hash(event)];
670
671 s = splsched();
672 wait_queue_lock(wqueue);
673 thread_lock(thread);
674
675 wresult = wait_queue_assert_wait64_locked(wqueue, (uint32_t)event,
676 interruptible, deadline, thread);
677
678 thread_unlock(thread);
679 wait_queue_unlock(wqueue);
680 splx(s);
681
682 return (wresult);
683 }
684
685 /*
686 * thread_sleep_fast_usimple_lock:
687 *
688 * Cause the current thread to wait until the specified event
689 * occurs. The specified simple_lock is unlocked before releasing
690 * the cpu and re-acquired as part of waking up.
691 *
692 * This is the simple lock sleep interface for components that use a
693 * faster version of simple_lock() than is provided by usimple_lock().
694 */
695 __private_extern__ wait_result_t
696 thread_sleep_fast_usimple_lock(
697 event_t event,
698 simple_lock_t lock,
699 wait_interrupt_t interruptible)
700 {
701 wait_result_t res;
702
703 res = assert_wait(event, interruptible);
704 if (res == THREAD_WAITING) {
705 simple_unlock(lock);
706 res = thread_block(THREAD_CONTINUE_NULL);
707 simple_lock(lock);
708 }
709 return res;
710 }
711
712
713 /*
714 * thread_sleep_usimple_lock:
715 *
716 * Cause the current thread to wait until the specified event
717 * occurs. The specified usimple_lock is unlocked before releasing
718 * the cpu and re-acquired as part of waking up.
719 *
720 * This is the simple lock sleep interface for components where
721 * simple_lock() is defined in terms of usimple_lock().
722 */
723 wait_result_t
724 thread_sleep_usimple_lock(
725 event_t event,
726 usimple_lock_t lock,
727 wait_interrupt_t interruptible)
728 {
729 wait_result_t res;
730
731 res = assert_wait(event, interruptible);
732 if (res == THREAD_WAITING) {
733 usimple_unlock(lock);
734 res = thread_block(THREAD_CONTINUE_NULL);
735 usimple_lock(lock);
736 }
737 return res;
738 }
739
740 /*
741 * thread_sleep_mutex:
742 *
743 * Cause the current thread to wait until the specified event
744 * occurs. The specified mutex is unlocked before releasing
745 * the cpu. The mutex will be re-acquired before returning.
746 *
747 * JMM - Add hint to make sure mutex is available before rousting
748 */
749 wait_result_t
750 thread_sleep_mutex(
751 event_t event,
752 mutex_t *mutex,
753 wait_interrupt_t interruptible)
754 {
755 wait_result_t res;
756
757 res = assert_wait(event, interruptible);
758 if (res == THREAD_WAITING) {
759 mutex_unlock(mutex);
760 res = thread_block(THREAD_CONTINUE_NULL);
761 mutex_lock(mutex);
762 }
763 return res;
764 }
765
766 /*
767 * thread_sleep_mutex_deadline:
768 *
769 * Cause the current thread to wait until the specified event
770 * (or deadline) occurs. The specified mutex is unlocked before
771 * releasing the cpu. The mutex will be re-acquired before returning.
772 */
773 wait_result_t
774 thread_sleep_mutex_deadline(
775 event_t event,
776 mutex_t *mutex,
777 uint64_t deadline,
778 wait_interrupt_t interruptible)
779 {
780 wait_result_t res;
781
782 res = assert_wait_deadline(event, interruptible, deadline);
783 if (res == THREAD_WAITING) {
784 mutex_unlock(mutex);
785 res = thread_block(THREAD_CONTINUE_NULL);
786 mutex_lock(mutex);
787 }
788 return res;
789 }
790
791 /*
792 * thread_sleep_lock_write:
793 *
794 * Cause the current thread to wait until the specified event
795 * occurs. The specified (write) lock is unlocked before releasing
796 * the cpu. The (write) lock will be re-acquired before returning.
797 */
798 wait_result_t
799 thread_sleep_lock_write(
800 event_t event,
801 lock_t *lock,
802 wait_interrupt_t interruptible)
803 {
804 wait_result_t res;
805
806 res = assert_wait(event, interruptible);
807 if (res == THREAD_WAITING) {
808 lock_write_done(lock);
809 res = thread_block(THREAD_CONTINUE_NULL);
810 lock_write(lock);
811 }
812 return res;
813 }
814
815 /*
816 * thread_stop:
817 *
818 * Force a preemption point for a thread and wait
819 * for it to stop running. Arbitrates access among
820 * multiple stop requests. (released by unstop)
821 *
822 * The thread must enter a wait state and stop via a
823 * separate means.
824 *
825 * Returns FALSE if interrupted.
826 */
827 boolean_t
828 thread_stop(
829 thread_t thread)
830 {
831 wait_result_t wresult;
832 spl_t s;
833
834 s = splsched();
835 wake_lock(thread);
836
837 while (thread->state & TH_SUSP) {
838 thread->wake_active = TRUE;
839 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
840 wake_unlock(thread);
841 splx(s);
842
843 if (wresult == THREAD_WAITING)
844 wresult = thread_block(THREAD_CONTINUE_NULL);
845
846 if (wresult != THREAD_AWAKENED)
847 return (FALSE);
848
849 s = splsched();
850 wake_lock(thread);
851 }
852
853 thread_lock(thread);
854 thread->state |= TH_SUSP;
855
856 while (thread->state & TH_RUN) {
857 processor_t processor = thread->last_processor;
858
859 if ( processor != PROCESSOR_NULL &&
860 processor->state == PROCESSOR_RUNNING &&
861 processor->active_thread == thread )
862 cause_ast_check(processor);
863 thread_unlock(thread);
864
865 thread->wake_active = TRUE;
866 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
867 wake_unlock(thread);
868 splx(s);
869
870 if (wresult == THREAD_WAITING)
871 wresult = thread_block(THREAD_CONTINUE_NULL);
872
873 if (wresult != THREAD_AWAKENED) {
874 thread_unstop(thread);
875 return (FALSE);
876 }
877
878 s = splsched();
879 wake_lock(thread);
880 thread_lock(thread);
881 }
882
883 thread_unlock(thread);
884 wake_unlock(thread);
885 splx(s);
886
887 return (TRUE);
888 }
889
890 /*
891 * thread_unstop:
892 *
893 * Release a previous stop request and set
894 * the thread running if appropriate.
895 *
896 * Use only after a successful stop operation.
897 */
898 void
899 thread_unstop(
900 thread_t thread)
901 {
902 spl_t s = splsched();
903
904 wake_lock(thread);
905 thread_lock(thread);
906
907 if ((thread->state & (TH_RUN|TH_WAIT|TH_SUSP)) == TH_SUSP) {
908 thread->state &= ~TH_SUSP;
909 thread_unblock(thread, THREAD_AWAKENED);
910
911 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
912 }
913 else
914 if (thread->state & TH_SUSP) {
915 thread->state &= ~TH_SUSP;
916
917 if (thread->wake_active) {
918 thread->wake_active = FALSE;
919 thread_unlock(thread);
920 wake_unlock(thread);
921 splx(s);
922
923 thread_wakeup(&thread->wake_active);
924 return;
925 }
926 }
927
928 thread_unlock(thread);
929 wake_unlock(thread);
930 splx(s);
931 }
932
933 /*
934 * thread_wait:
935 *
936 * Wait for a thread to stop running. (non-interruptible)
937 *
938 */
939 void
940 thread_wait(
941 thread_t thread)
942 {
943 wait_result_t wresult;
944 spl_t s = splsched();
945
946 wake_lock(thread);
947 thread_lock(thread);
948
949 while (thread->state & TH_RUN) {
950 processor_t processor = thread->last_processor;
951
952 if ( processor != PROCESSOR_NULL &&
953 processor->state == PROCESSOR_RUNNING &&
954 processor->active_thread == thread )
955 cause_ast_check(processor);
956 thread_unlock(thread);
957
958 thread->wake_active = TRUE;
959 wresult = assert_wait(&thread->wake_active, THREAD_UNINT);
960 wake_unlock(thread);
961 splx(s);
962
963 if (wresult == THREAD_WAITING)
964 thread_block(THREAD_CONTINUE_NULL);
965
966 s = splsched();
967 wake_lock(thread);
968 thread_lock(thread);
969 }
970
971 thread_unlock(thread);
972 wake_unlock(thread);
973 splx(s);
974 }
975
976 /*
977 * Routine: clear_wait_internal
978 *
979 * Clear the wait condition for the specified thread.
980 * Start the thread executing if that is appropriate.
981 * Arguments:
982 * thread thread to awaken
983 * result Wakeup result the thread should see
984 * Conditions:
985 * At splsched
986 * the thread is locked.
987 * Returns:
988 * KERN_SUCCESS thread was rousted out a wait
989 * KERN_FAILURE thread was waiting but could not be rousted
990 * KERN_NOT_WAITING thread was not waiting
991 */
992 __private_extern__ kern_return_t
993 clear_wait_internal(
994 thread_t thread,
995 wait_result_t wresult)
996 {
997 wait_queue_t wq = thread->wait_queue;
998 int i = LockTimeOut;
999
1000 do {
1001 if (wresult == THREAD_INTERRUPTED && (thread->state & TH_UNINT))
1002 return (KERN_FAILURE);
1003
1004 if (wq != WAIT_QUEUE_NULL) {
1005 if (wait_queue_lock_try(wq)) {
1006 wait_queue_pull_thread_locked(wq, thread, TRUE);
1007 /* wait queue unlocked, thread still locked */
1008 }
1009 else {
1010 thread_unlock(thread);
1011 delay(1);
1012
1013 thread_lock(thread);
1014 if (wq != thread->wait_queue)
1015 return (KERN_NOT_WAITING);
1016
1017 continue;
1018 }
1019 }
1020
1021 return (thread_go(thread, wresult));
1022 } while (--i > 0);
1023
1024 panic("clear_wait_internal: deadlock: thread=0x%x, wq=0x%x, cpu=%d\n",
1025 thread, wq, cpu_number());
1026
1027 return (KERN_FAILURE);
1028 }
1029
1030
1031 /*
1032 * clear_wait:
1033 *
1034 * Clear the wait condition for the specified thread. Start the thread
1035 * executing if that is appropriate.
1036 *
1037 * parameters:
1038 * thread thread to awaken
1039 * result Wakeup result the thread should see
1040 */
1041 kern_return_t
1042 clear_wait(
1043 thread_t thread,
1044 wait_result_t result)
1045 {
1046 kern_return_t ret;
1047 spl_t s;
1048
1049 s = splsched();
1050 thread_lock(thread);
1051 ret = clear_wait_internal(thread, result);
1052 thread_unlock(thread);
1053 splx(s);
1054 return ret;
1055 }
1056
1057
1058 /*
1059 * thread_wakeup_prim:
1060 *
1061 * Common routine for thread_wakeup, thread_wakeup_with_result,
1062 * and thread_wakeup_one.
1063 *
1064 */
1065 kern_return_t
1066 thread_wakeup_prim(
1067 event_t event,
1068 boolean_t one_thread,
1069 wait_result_t result)
1070 {
1071 register wait_queue_t wq;
1072 register int index;
1073
1074 index = wait_hash(event);
1075 wq = &wait_queues[index];
1076 if (one_thread)
1077 return (wait_queue_wakeup_one(wq, event, result));
1078 else
1079 return (wait_queue_wakeup_all(wq, event, result));
1080 }
1081
1082 /*
1083 * thread_bind:
1084 *
1085 * Force a thread to execute on the specified processor.
1086 *
1087 * Returns the previous binding. PROCESSOR_NULL means
1088 * not bound.
1089 *
1090 * XXX - DO NOT export this to users - XXX
1091 */
1092 processor_t
1093 thread_bind(
1094 register thread_t thread,
1095 processor_t processor)
1096 {
1097 processor_t prev;
1098 run_queue_t runq = RUN_QUEUE_NULL;
1099 spl_t s;
1100
1101 s = splsched();
1102 thread_lock(thread);
1103 prev = thread->bound_processor;
1104 if (prev != PROCESSOR_NULL)
1105 runq = run_queue_remove(thread);
1106
1107 thread->bound_processor = processor;
1108
1109 if (runq != RUN_QUEUE_NULL)
1110 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1111 thread_unlock(thread);
1112 splx(s);
1113
1114 return (prev);
1115 }
1116
1117 struct {
1118 uint32_t idle_pset_last,
1119 idle_pset_any,
1120 idle_bound;
1121
1122 uint32_t pset_self,
1123 pset_last,
1124 pset_other,
1125 bound_self,
1126 bound_other;
1127
1128 uint32_t realtime_self,
1129 realtime_last,
1130 realtime_other;
1131
1132 uint32_t missed_realtime,
1133 missed_other;
1134 } dispatch_counts;
1135
1136 /*
1137 * Select a thread for the current processor to run.
1138 *
1139 * May select the current thread, which must be locked.
1140 */
1141 thread_t
1142 thread_select(
1143 register processor_t processor)
1144 {
1145 register thread_t thread;
1146 processor_set_t pset;
1147 boolean_t other_runnable;
1148
1149 /*
1150 * Check for other non-idle runnable threads.
1151 */
1152 pset = processor->processor_set;
1153 thread = processor->active_thread;
1154
1155 /* Update the thread's priority */
1156 if (thread->sched_stamp != sched_tick)
1157 update_priority(thread);
1158
1159 processor->current_pri = thread->sched_pri;
1160
1161 simple_lock(&pset->sched_lock);
1162
1163 other_runnable = processor->runq.count > 0 || pset->runq.count > 0;
1164
1165 if ( thread->state == TH_RUN &&
1166 thread->processor_set == pset &&
1167 (thread->bound_processor == PROCESSOR_NULL ||
1168 thread->bound_processor == processor) ) {
1169 if ( thread->sched_pri >= BASEPRI_RTQUEUES &&
1170 first_timeslice(processor) ) {
1171 if (pset->runq.highq >= BASEPRI_RTQUEUES) {
1172 register run_queue_t runq = &pset->runq;
1173 register queue_t q;
1174
1175 q = runq->queues + runq->highq;
1176 if (((thread_t)q->next)->realtime.deadline <
1177 processor->deadline) {
1178 thread = (thread_t)q->next;
1179 ((queue_entry_t)thread)->next->prev = q;
1180 q->next = ((queue_entry_t)thread)->next;
1181 thread->runq = RUN_QUEUE_NULL;
1182 assert(thread->sched_mode & TH_MODE_PREEMPT);
1183 runq->count--; runq->urgency--;
1184 if (queue_empty(q)) {
1185 if (runq->highq != IDLEPRI)
1186 clrbit(MAXPRI - runq->highq, runq->bitmap);
1187 runq->highq = MAXPRI - ffsbit(runq->bitmap);
1188 }
1189 }
1190 }
1191
1192 processor->deadline = thread->realtime.deadline;
1193
1194 simple_unlock(&pset->sched_lock);
1195
1196 return (thread);
1197 }
1198
1199 if ( (!other_runnable ||
1200 (processor->runq.highq < thread->sched_pri &&
1201 pset->runq.highq < thread->sched_pri)) ) {
1202
1203 /* I am the highest priority runnable (non-idle) thread */
1204
1205 processor->deadline = UINT64_MAX;
1206
1207 simple_unlock(&pset->sched_lock);
1208
1209 return (thread);
1210 }
1211 }
1212
1213 if (other_runnable)
1214 thread = choose_thread(pset, processor);
1215 else {
1216 /*
1217 * Nothing is runnable, so set this processor idle if it
1218 * was running. Return its idle thread.
1219 */
1220 if (processor->state == PROCESSOR_RUNNING) {
1221 remqueue(&pset->active_queue, (queue_entry_t)processor);
1222 processor->state = PROCESSOR_IDLE;
1223
1224 enqueue_tail(&pset->idle_queue, (queue_entry_t)processor);
1225 pset->idle_count++;
1226 }
1227
1228 processor->deadline = UINT64_MAX;
1229
1230 thread = processor->idle_thread;
1231 }
1232
1233 simple_unlock(&pset->sched_lock);
1234
1235 return (thread);
1236 }
1237
1238 /*
1239 * Perform a context switch and start executing the new thread.
1240 *
1241 * Returns FALSE on failure, and the thread is re-dispatched.
1242 *
1243 * Called at splsched.
1244 */
1245
1246 #define funnel_release_check(thread, debug) \
1247 MACRO_BEGIN \
1248 if ((thread)->funnel_state & TH_FN_OWNED) { \
1249 (thread)->funnel_state = TH_FN_REFUNNEL; \
1250 KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE, \
1251 (thread)->funnel_lock, (debug), 0, 0, 0); \
1252 funnel_unlock((thread)->funnel_lock); \
1253 } \
1254 MACRO_END
1255
1256 #define funnel_refunnel_check(thread, debug) \
1257 MACRO_BEGIN \
1258 if ((thread)->funnel_state & TH_FN_REFUNNEL) { \
1259 kern_return_t result = (thread)->wait_result; \
1260 \
1261 (thread)->funnel_state = 0; \
1262 KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE, \
1263 (thread)->funnel_lock, (debug), 0, 0, 0); \
1264 funnel_lock((thread)->funnel_lock); \
1265 KERNEL_DEBUG(0x6032430 | DBG_FUNC_NONE, \
1266 (thread)->funnel_lock, (debug), 0, 0, 0); \
1267 (thread)->funnel_state = TH_FN_OWNED; \
1268 (thread)->wait_result = result; \
1269 } \
1270 MACRO_END
1271
1272 boolean_t
1273 thread_invoke(
1274 register thread_t old_thread,
1275 register thread_t new_thread,
1276 ast_t reason)
1277 {
1278 thread_continue_t new_cont, continuation = old_thread->continuation;
1279 void *new_param, *parameter = old_thread->parameter;
1280 processor_t processor;
1281 thread_t prev_thread;
1282
1283 if (get_preemption_level() != 0)
1284 panic("thread_invoke: preemption_level %d\n",
1285 get_preemption_level());
1286
1287 assert(old_thread == current_thread());
1288
1289 /*
1290 * Mark thread interruptible.
1291 */
1292 thread_lock(new_thread);
1293 new_thread->state &= ~TH_UNINT;
1294
1295 assert(thread_runnable(new_thread));
1296
1297 /*
1298 * Allow time constraint threads to hang onto
1299 * a stack.
1300 */
1301 if ( (old_thread->sched_mode & TH_MODE_REALTIME) &&
1302 !old_thread->reserved_stack ) {
1303 old_thread->reserved_stack = old_thread->kernel_stack;
1304 }
1305
1306 if (continuation != NULL) {
1307 if (!new_thread->kernel_stack) {
1308 /*
1309 * If the old thread is using a privileged stack,
1310 * check to see whether we can exchange it with
1311 * that of the new thread.
1312 */
1313 if ( old_thread->kernel_stack == old_thread->reserved_stack &&
1314 !new_thread->reserved_stack)
1315 goto need_stack;
1316
1317 /*
1318 * Context switch by performing a stack handoff.
1319 */
1320 new_cont = new_thread->continuation;
1321 new_thread->continuation = NULL;
1322 new_param = new_thread->parameter;
1323 new_thread->parameter = NULL;
1324
1325 processor = current_processor();
1326 processor->active_thread = new_thread;
1327 processor->current_pri = new_thread->sched_pri;
1328 new_thread->last_processor = processor;
1329 ast_context(new_thread);
1330 thread_unlock(new_thread);
1331
1332 current_task()->csw++;
1333
1334 old_thread->reason = reason;
1335
1336 processor->last_dispatch = mach_absolute_time();
1337 timer_event((uint32_t)processor->last_dispatch,
1338 &new_thread->system_timer);
1339
1340 thread_done(old_thread, new_thread, processor);
1341
1342 machine_stack_handoff(old_thread, new_thread);
1343
1344 thread_begin(new_thread, processor);
1345
1346 /*
1347 * Now dispatch the old thread.
1348 */
1349 thread_dispatch(old_thread);
1350
1351 counter_always(c_thread_invoke_hits++);
1352
1353 funnel_refunnel_check(new_thread, 2);
1354 (void) spllo();
1355
1356 assert(new_cont);
1357 call_continuation(new_cont, new_param, new_thread->wait_result);
1358 /*NOTREACHED*/
1359 }
1360 else
1361 if (new_thread == old_thread) {
1362 /* same thread but with continuation */
1363 counter(++c_thread_invoke_same);
1364 thread_unlock(new_thread);
1365
1366 funnel_refunnel_check(new_thread, 3);
1367 (void) spllo();
1368
1369 call_continuation(continuation, parameter, new_thread->wait_result);
1370 /*NOTREACHED*/
1371 }
1372 }
1373 else {
1374 /*
1375 * Check that the new thread has a stack
1376 */
1377 if (!new_thread->kernel_stack) {
1378 need_stack:
1379 if (!stack_alloc_try(new_thread)) {
1380 counter_always(c_thread_invoke_misses++);
1381 thread_unlock(new_thread);
1382 thread_stack_enqueue(new_thread);
1383 return (FALSE);
1384 }
1385 }
1386 else
1387 if (new_thread == old_thread) {
1388 counter(++c_thread_invoke_same);
1389 thread_unlock(new_thread);
1390 return (TRUE);
1391 }
1392 }
1393
1394 /*
1395 * Context switch by full context save.
1396 */
1397 processor = current_processor();
1398 processor->active_thread = new_thread;
1399 processor->current_pri = new_thread->sched_pri;
1400 new_thread->last_processor = processor;
1401 ast_context(new_thread);
1402 assert(thread_runnable(new_thread));
1403 thread_unlock(new_thread);
1404
1405 counter_always(c_thread_invoke_csw++);
1406 current_task()->csw++;
1407
1408 assert(old_thread->runq == RUN_QUEUE_NULL);
1409 old_thread->reason = reason;
1410
1411 processor->last_dispatch = mach_absolute_time();
1412 timer_event((uint32_t)processor->last_dispatch, &new_thread->system_timer);
1413
1414 thread_done(old_thread, new_thread, processor);
1415
1416 /*
1417 * This is where we actually switch register context,
1418 * and address space if required. Control will not
1419 * return here immediately.
1420 */
1421 prev_thread = machine_switch_context(old_thread, continuation, new_thread);
1422
1423 /*
1424 * We are still old_thread, possibly on a different processor,
1425 * and new_thread is now stale.
1426 */
1427 thread_begin(old_thread, old_thread->last_processor);
1428
1429 /*
1430 * Now dispatch the thread which resumed us.
1431 */
1432 thread_dispatch(prev_thread);
1433
1434 if (continuation) {
1435 funnel_refunnel_check(old_thread, 3);
1436 (void) spllo();
1437
1438 call_continuation(continuation, parameter, old_thread->wait_result);
1439 /*NOTREACHED*/
1440 }
1441
1442 return (TRUE);
1443 }
1444
1445 /*
1446 * thread_done:
1447 *
1448 * Perform calculations for thread
1449 * finishing execution on the current processor.
1450 *
1451 * Called at splsched.
1452 */
1453 void
1454 thread_done(
1455 thread_t old_thread,
1456 thread_t new_thread,
1457 processor_t processor)
1458 {
1459 if (!(old_thread->state & TH_IDLE)) {
1460 /*
1461 * Compute remainder of current quantum.
1462 */
1463 if ( first_timeslice(processor) &&
1464 processor->quantum_end > processor->last_dispatch )
1465 old_thread->current_quantum =
1466 (processor->quantum_end - processor->last_dispatch);
1467 else
1468 old_thread->current_quantum = 0;
1469
1470 if (old_thread->sched_mode & TH_MODE_REALTIME) {
1471 /*
1472 * Cancel the deadline if the thread has
1473 * consumed the entire quantum.
1474 */
1475 if (old_thread->current_quantum == 0) {
1476 old_thread->realtime.deadline = UINT64_MAX;
1477 old_thread->reason |= AST_QUANTUM;
1478 }
1479 }
1480 else {
1481 /*
1482 * For non-realtime threads treat a tiny
1483 * remaining quantum as an expired quantum
1484 * but include what's left next time.
1485 */
1486 if (old_thread->current_quantum < min_std_quantum) {
1487 old_thread->reason |= AST_QUANTUM;
1488 old_thread->current_quantum += std_quantum;
1489 }
1490 }
1491
1492 /*
1493 * If we are doing a direct handoff then
1494 * give the remainder of our quantum to
1495 * the next thread.
1496 */
1497 if ((old_thread->reason & (AST_HANDOFF|AST_QUANTUM)) == AST_HANDOFF) {
1498 new_thread->current_quantum = old_thread->current_quantum;
1499 old_thread->reason |= AST_QUANTUM;
1500 old_thread->current_quantum = 0;
1501 }
1502
1503 old_thread->last_switch = processor->last_dispatch;
1504
1505 old_thread->computation_metered +=
1506 (old_thread->last_switch - old_thread->computation_epoch);
1507 }
1508 }
1509
1510 /*
1511 * thread_begin:
1512 *
1513 * Set up for thread beginning execution on
1514 * the current processor.
1515 *
1516 * Called at splsched.
1517 */
1518 void
1519 thread_begin(
1520 thread_t thread,
1521 processor_t processor)
1522 {
1523 if (!(thread->state & TH_IDLE)) {
1524 /*
1525 * Give the thread a new quantum
1526 * if none remaining.
1527 */
1528 if (thread->current_quantum == 0)
1529 thread_quantum_init(thread);
1530
1531 /*
1532 * Set up quantum timer and timeslice.
1533 */
1534 processor->quantum_end =
1535 (processor->last_dispatch + thread->current_quantum);
1536 timer_call_enter1(&processor->quantum_timer,
1537 thread, processor->quantum_end);
1538
1539 processor_timeslice_setup(processor, thread);
1540
1541 thread->last_switch = processor->last_dispatch;
1542
1543 thread->computation_epoch = thread->last_switch;
1544 }
1545 else {
1546 timer_call_cancel(&processor->quantum_timer);
1547 processor->timeslice = 1;
1548 }
1549 }
1550
1551 /*
1552 * thread_dispatch:
1553 *
1554 * Handle previous thread at context switch. Re-dispatch
1555 * if still running, otherwise update run state and perform
1556 * special actions.
1557 *
1558 * Called at splsched.
1559 */
1560 void
1561 thread_dispatch(
1562 register thread_t thread)
1563 {
1564 /*
1565 * If blocked at a continuation, discard
1566 * the stack.
1567 */
1568 #ifndef i386
1569 if (thread->continuation != NULL && thread->kernel_stack)
1570 stack_free(thread);
1571 #endif
1572
1573 if (!(thread->state & TH_IDLE)) {
1574 wake_lock(thread);
1575 thread_lock(thread);
1576
1577 if (!(thread->state & TH_WAIT)) {
1578 /*
1579 * Still running.
1580 */
1581 if (thread->reason & AST_QUANTUM)
1582 thread_setrun(thread, SCHED_TAILQ);
1583 else
1584 if (thread->reason & AST_PREEMPT)
1585 thread_setrun(thread, SCHED_HEADQ);
1586 else
1587 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1588
1589 thread->reason = AST_NONE;
1590
1591 thread_unlock(thread);
1592 wake_unlock(thread);
1593 }
1594 else {
1595 boolean_t wake;
1596
1597 /*
1598 * Waiting.
1599 */
1600 thread->state &= ~TH_RUN;
1601
1602 wake = thread->wake_active;
1603 thread->wake_active = FALSE;
1604
1605 if (thread->sched_mode & TH_MODE_TIMESHARE)
1606 pset_share_decr(thread->processor_set);
1607 pset_run_decr(thread->processor_set);
1608
1609 thread_unlock(thread);
1610 wake_unlock(thread);
1611
1612 if (thread->options & TH_OPT_CALLOUT)
1613 call_thread_block();
1614
1615 if (wake)
1616 thread_wakeup((event_t)&thread->wake_active);
1617
1618 if (thread->state & TH_TERMINATE)
1619 thread_terminate_enqueue(thread);
1620 }
1621 }
1622 }
1623
1624 /*
1625 * thread_block_reason:
1626 *
1627 * Forces a reschedule, blocking the caller if a wait
1628 * has been asserted.
1629 *
1630 * If a continuation is specified, then thread_invoke will
1631 * attempt to discard the thread's kernel stack. When the
1632 * thread resumes, it will execute the continuation function
1633 * on a new kernel stack.
1634 */
1635 counter(mach_counter_t c_thread_block_calls = 0;)
1636
1637 wait_result_t
1638 thread_block_reason(
1639 thread_continue_t continuation,
1640 void *parameter,
1641 ast_t reason)
1642 {
1643 register thread_t self = current_thread();
1644 register processor_t processor;
1645 register thread_t new_thread;
1646 spl_t s;
1647
1648 counter(++c_thread_block_calls);
1649
1650 s = splsched();
1651
1652 if (!(reason & AST_PREEMPT))
1653 funnel_release_check(self, 2);
1654
1655 processor = current_processor();
1656
1657 /*
1658 * Delay switching to the idle thread under certain conditions.
1659 */
1660 if (s != FALSE && (self->state & (TH_IDLE|TH_TERMINATE|TH_WAIT)) == TH_WAIT) {
1661 if ( processor->processor_set->processor_count > 1 &&
1662 processor->processor_set->runq.count == 0 &&
1663 processor->runq.count == 0 )
1664 processor = delay_idle(processor, self);
1665 }
1666
1667 /* If we're explicitly yielding, force a subsequent quantum */
1668 if (reason & AST_YIELD)
1669 processor->timeslice = 0;
1670
1671 /* We're handling all scheduling AST's */
1672 ast_off(AST_SCHEDULING);
1673
1674 self->continuation = continuation;
1675 self->parameter = parameter;
1676
1677 thread_lock(self);
1678 new_thread = thread_select(processor);
1679 assert(new_thread && thread_runnable(new_thread));
1680 thread_unlock(self);
1681 while (!thread_invoke(self, new_thread, reason)) {
1682 thread_lock(self);
1683 new_thread = thread_select(processor);
1684 assert(new_thread && thread_runnable(new_thread));
1685 thread_unlock(self);
1686 }
1687
1688 funnel_refunnel_check(self, 5);
1689 splx(s);
1690
1691 return (self->wait_result);
1692 }
1693
1694 /*
1695 * thread_block:
1696 *
1697 * Block the current thread if a wait has been asserted.
1698 */
1699 wait_result_t
1700 thread_block(
1701 thread_continue_t continuation)
1702 {
1703 return thread_block_reason(continuation, NULL, AST_NONE);
1704 }
1705
1706 wait_result_t
1707 thread_block_parameter(
1708 thread_continue_t continuation,
1709 void *parameter)
1710 {
1711 return thread_block_reason(continuation, parameter, AST_NONE);
1712 }
1713
1714 /*
1715 * thread_run:
1716 *
1717 * Switch directly from the current thread to the
1718 * new thread, handing off our quantum if appropriate.
1719 *
1720 * New thread must be runnable, and not on a run queue.
1721 *
1722 * Called at splsched.
1723 */
1724 int
1725 thread_run(
1726 thread_t self,
1727 thread_continue_t continuation,
1728 void *parameter,
1729 thread_t new_thread)
1730 {
1731 ast_t handoff = AST_HANDOFF;
1732
1733 funnel_release_check(self, 3);
1734
1735 self->continuation = continuation;
1736 self->parameter = parameter;
1737
1738 while (!thread_invoke(self, new_thread, handoff)) {
1739 register processor_t processor = current_processor();
1740
1741 thread_lock(self);
1742 new_thread = thread_select(processor);
1743 thread_unlock(self);
1744 handoff = AST_NONE;
1745 }
1746
1747 funnel_refunnel_check(self, 6);
1748
1749 return (self->wait_result);
1750 }
1751
1752 /*
1753 * thread_continue:
1754 *
1755 * Called at splsched when a thread first receives
1756 * a new stack after a continuation.
1757 */
1758 void
1759 thread_continue(
1760 register thread_t old_thread)
1761 {
1762 register thread_t self = current_thread();
1763 register thread_continue_t continuation;
1764 register void *parameter;
1765
1766 continuation = self->continuation;
1767 self->continuation = NULL;
1768 parameter = self->parameter;
1769 self->parameter = NULL;
1770
1771 thread_begin(self, self->last_processor);
1772
1773 if (old_thread != THREAD_NULL)
1774 thread_dispatch(old_thread);
1775
1776 funnel_refunnel_check(self, 4);
1777
1778 if (old_thread != THREAD_NULL)
1779 (void)spllo();
1780
1781 call_continuation(continuation, parameter, self->wait_result);
1782 /*NOTREACHED*/
1783 }
1784
1785 /*
1786 * Enqueue thread on run queue. Thread must be locked,
1787 * and not already be on a run queue. Returns TRUE
1788 * if a preemption is indicated based on the state
1789 * of the run queue.
1790 *
1791 * Run queue must be locked, see run_queue_remove()
1792 * for more info.
1793 */
1794 static boolean_t
1795 run_queue_enqueue(
1796 register run_queue_t rq,
1797 register thread_t thread,
1798 integer_t options)
1799 {
1800 register int whichq = thread->sched_pri;
1801 register queue_t queue = &rq->queues[whichq];
1802 boolean_t result = FALSE;
1803
1804 assert(whichq >= MINPRI && whichq <= MAXPRI);
1805
1806 assert(thread->runq == RUN_QUEUE_NULL);
1807 if (queue_empty(queue)) {
1808 enqueue_tail(queue, (queue_entry_t)thread);
1809
1810 setbit(MAXPRI - whichq, rq->bitmap);
1811 if (whichq > rq->highq) {
1812 rq->highq = whichq;
1813 result = TRUE;
1814 }
1815 }
1816 else
1817 if (options & SCHED_HEADQ)
1818 enqueue_head(queue, (queue_entry_t)thread);
1819 else
1820 enqueue_tail(queue, (queue_entry_t)thread);
1821
1822 thread->runq = rq;
1823 if (thread->sched_mode & TH_MODE_PREEMPT)
1824 rq->urgency++;
1825 rq->count++;
1826
1827 return (result);
1828 }
1829
1830 /*
1831 * Enqueue a thread for realtime execution, similar
1832 * to above. Handles preemption directly.
1833 */
1834 static void
1835 realtime_schedule_insert(
1836 register processor_set_t pset,
1837 register thread_t thread)
1838 {
1839 register run_queue_t rq = &pset->runq;
1840 register int whichq = thread->sched_pri;
1841 register queue_t queue = &rq->queues[whichq];
1842 uint64_t deadline = thread->realtime.deadline;
1843 boolean_t try_preempt = FALSE;
1844
1845 assert(whichq >= BASEPRI_REALTIME && whichq <= MAXPRI);
1846
1847 assert(thread->runq == RUN_QUEUE_NULL);
1848 if (queue_empty(queue)) {
1849 enqueue_tail(queue, (queue_entry_t)thread);
1850
1851 setbit(MAXPRI - whichq, rq->bitmap);
1852 if (whichq > rq->highq)
1853 rq->highq = whichq;
1854 try_preempt = TRUE;
1855 }
1856 else {
1857 register thread_t entry = (thread_t)queue_first(queue);
1858
1859 while (TRUE) {
1860 if ( queue_end(queue, (queue_entry_t)entry) ||
1861 deadline < entry->realtime.deadline ) {
1862 entry = (thread_t)queue_prev((queue_entry_t)entry);
1863 break;
1864 }
1865
1866 entry = (thread_t)queue_next((queue_entry_t)entry);
1867 }
1868
1869 if ((queue_entry_t)entry == queue)
1870 try_preempt = TRUE;
1871
1872 insque((queue_entry_t)thread, (queue_entry_t)entry);
1873 }
1874
1875 thread->runq = rq;
1876 assert(thread->sched_mode & TH_MODE_PREEMPT);
1877 rq->count++; rq->urgency++;
1878
1879 if (try_preempt) {
1880 register processor_t processor;
1881
1882 processor = current_processor();
1883 if ( pset == processor->processor_set &&
1884 (thread->sched_pri > processor->current_pri ||
1885 deadline < processor->deadline ) ) {
1886 dispatch_counts.realtime_self++;
1887 simple_unlock(&pset->sched_lock);
1888
1889 ast_on(AST_PREEMPT | AST_URGENT);
1890 return;
1891 }
1892
1893 if ( pset->processor_count > 1 ||
1894 pset != processor->processor_set ) {
1895 processor_t myprocessor, lastprocessor;
1896 queue_entry_t next;
1897
1898 myprocessor = processor;
1899 processor = thread->last_processor;
1900 if ( processor != myprocessor &&
1901 processor != PROCESSOR_NULL &&
1902 processor->processor_set == pset &&
1903 processor->state == PROCESSOR_RUNNING &&
1904 (thread->sched_pri > processor->current_pri ||
1905 deadline < processor->deadline ) ) {
1906 dispatch_counts.realtime_last++;
1907 cause_ast_check(processor);
1908 simple_unlock(&pset->sched_lock);
1909 return;
1910 }
1911
1912 lastprocessor = processor;
1913 queue = &pset->active_queue;
1914 processor = (processor_t)queue_first(queue);
1915 while (!queue_end(queue, (queue_entry_t)processor)) {
1916 next = queue_next((queue_entry_t)processor);
1917
1918 if ( processor != myprocessor &&
1919 processor != lastprocessor &&
1920 (thread->sched_pri > processor->current_pri ||
1921 deadline < processor->deadline ) ) {
1922 if (!queue_end(queue, next)) {
1923 remqueue(queue, (queue_entry_t)processor);
1924 enqueue_tail(queue, (queue_entry_t)processor);
1925 }
1926 dispatch_counts.realtime_other++;
1927 cause_ast_check(processor);
1928 simple_unlock(&pset->sched_lock);
1929 return;
1930 }
1931
1932 processor = (processor_t)next;
1933 }
1934 }
1935 }
1936
1937 simple_unlock(&pset->sched_lock);
1938 }
1939
1940 /*
1941 * thread_setrun:
1942 *
1943 * Dispatch thread for execution, directly onto an idle
1944 * processor if possible. Else put on appropriate run
1945 * queue. (local if bound, else processor set)
1946 *
1947 * Thread must be locked.
1948 */
1949 void
1950 thread_setrun(
1951 register thread_t new_thread,
1952 integer_t options)
1953 {
1954 register processor_t processor;
1955 register processor_set_t pset;
1956 register thread_t thread;
1957 ast_t preempt = (options & SCHED_PREEMPT)?
1958 AST_PREEMPT: AST_NONE;
1959
1960 assert(thread_runnable(new_thread));
1961
1962 /*
1963 * Update priority if needed.
1964 */
1965 if (new_thread->sched_stamp != sched_tick)
1966 update_priority(new_thread);
1967
1968 /*
1969 * Check for urgent preemption.
1970 */
1971 if (new_thread->sched_mode & TH_MODE_PREEMPT)
1972 preempt = (AST_PREEMPT | AST_URGENT);
1973
1974 assert(new_thread->runq == RUN_QUEUE_NULL);
1975
1976 if ((processor = new_thread->bound_processor) == PROCESSOR_NULL) {
1977 /*
1978 * First try to dispatch on
1979 * the last processor.
1980 */
1981 pset = new_thread->processor_set;
1982 processor = new_thread->last_processor;
1983 if ( pset->processor_count > 1 &&
1984 processor != PROCESSOR_NULL &&
1985 processor->state == PROCESSOR_IDLE ) {
1986 processor_lock(processor);
1987 simple_lock(&pset->sched_lock);
1988 if ( processor->processor_set == pset &&
1989 processor->state == PROCESSOR_IDLE ) {
1990 remqueue(&pset->idle_queue, (queue_entry_t)processor);
1991 pset->idle_count--;
1992 processor->next_thread = new_thread;
1993 if (new_thread->sched_pri >= BASEPRI_RTQUEUES)
1994 processor->deadline = new_thread->realtime.deadline;
1995 else
1996 processor->deadline = UINT64_MAX;
1997 processor->state = PROCESSOR_DISPATCHING;
1998 dispatch_counts.idle_pset_last++;
1999 simple_unlock(&pset->sched_lock);
2000 processor_unlock(processor);
2001 if (processor != current_processor())
2002 machine_signal_idle(processor);
2003 return;
2004 }
2005 processor_unlock(processor);
2006 }
2007 else
2008 simple_lock(&pset->sched_lock);
2009
2010 /*
2011 * Next pick any idle processor
2012 * in the processor set.
2013 */
2014 if (pset->idle_count > 0) {
2015 processor = (processor_t)dequeue_head(&pset->idle_queue);
2016 pset->idle_count--;
2017 processor->next_thread = new_thread;
2018 if (new_thread->sched_pri >= BASEPRI_RTQUEUES)
2019 processor->deadline = new_thread->realtime.deadline;
2020 else
2021 processor->deadline = UINT64_MAX;
2022 processor->state = PROCESSOR_DISPATCHING;
2023 dispatch_counts.idle_pset_any++;
2024 simple_unlock(&pset->sched_lock);
2025 if (processor != current_processor())
2026 machine_signal_idle(processor);
2027 return;
2028 }
2029
2030 if (new_thread->sched_pri >= BASEPRI_RTQUEUES)
2031 realtime_schedule_insert(pset, new_thread);
2032 else {
2033 if (!run_queue_enqueue(&pset->runq, new_thread, options))
2034 preempt = AST_NONE;
2035
2036 /*
2037 * Update the timesharing quanta.
2038 */
2039 timeshare_quanta_update(pset);
2040
2041 /*
2042 * Preempt check.
2043 */
2044 if (preempt != AST_NONE) {
2045 /*
2046 * First try the current processor
2047 * if it is a member of the correct
2048 * processor set.
2049 */
2050 processor = current_processor();
2051 thread = processor->active_thread;
2052 if ( pset == processor->processor_set &&
2053 csw_needed(thread, processor) ) {
2054 dispatch_counts.pset_self++;
2055 simple_unlock(&pset->sched_lock);
2056
2057 ast_on(preempt);
2058 return;
2059 }
2060
2061 /*
2062 * If that failed and we have other
2063 * processors available keep trying.
2064 */
2065 if ( pset->processor_count > 1 ||
2066 pset != processor->processor_set ) {
2067 queue_t queue = &pset->active_queue;
2068 processor_t myprocessor, lastprocessor;
2069 queue_entry_t next;
2070
2071 /*
2072 * Next try the last processor
2073 * dispatched on.
2074 */
2075 myprocessor = processor;
2076 processor = new_thread->last_processor;
2077 if ( processor != myprocessor &&
2078 processor != PROCESSOR_NULL &&
2079 processor->processor_set == pset &&
2080 processor->state == PROCESSOR_RUNNING &&
2081 new_thread->sched_pri > processor->current_pri ) {
2082 dispatch_counts.pset_last++;
2083 cause_ast_check(processor);
2084 simple_unlock(&pset->sched_lock);
2085 return;
2086 }
2087
2088 /*
2089 * Lastly, pick any other
2090 * available processor.
2091 */
2092 lastprocessor = processor;
2093 processor = (processor_t)queue_first(queue);
2094 while (!queue_end(queue, (queue_entry_t)processor)) {
2095 next = queue_next((queue_entry_t)processor);
2096
2097 if ( processor != myprocessor &&
2098 processor != lastprocessor &&
2099 new_thread->sched_pri >
2100 processor->current_pri ) {
2101 if (!queue_end(queue, next)) {
2102 remqueue(queue, (queue_entry_t)processor);
2103 enqueue_tail(queue, (queue_entry_t)processor);
2104 }
2105 dispatch_counts.pset_other++;
2106 cause_ast_check(processor);
2107 simple_unlock(&pset->sched_lock);
2108 return;
2109 }
2110
2111 processor = (processor_t)next;
2112 }
2113 }
2114 }
2115
2116 simple_unlock(&pset->sched_lock);
2117 }
2118 }
2119 else {
2120 /*
2121 * Bound, can only run on bound processor. Have to lock
2122 * processor here because it may not be the current one.
2123 */
2124 processor_lock(processor);
2125 pset = processor->processor_set;
2126 if (pset != PROCESSOR_SET_NULL) {
2127 simple_lock(&pset->sched_lock);
2128 if (processor->state == PROCESSOR_IDLE) {
2129 remqueue(&pset->idle_queue, (queue_entry_t)processor);
2130 pset->idle_count--;
2131 processor->next_thread = new_thread;
2132 processor->deadline = UINT64_MAX;
2133 processor->state = PROCESSOR_DISPATCHING;
2134 dispatch_counts.idle_bound++;
2135 simple_unlock(&pset->sched_lock);
2136 processor_unlock(processor);
2137 if (processor != current_processor())
2138 machine_signal_idle(processor);
2139 return;
2140 }
2141 }
2142
2143 if (!run_queue_enqueue(&processor->runq, new_thread, options))
2144 preempt = AST_NONE;
2145
2146 if (preempt != AST_NONE) {
2147 if (processor == current_processor()) {
2148 thread = processor->active_thread;
2149 if (csw_needed(thread, processor)) {
2150 dispatch_counts.bound_self++;
2151 ast_on(preempt);
2152 }
2153 }
2154 else
2155 if ( processor->state == PROCESSOR_RUNNING &&
2156 new_thread->sched_pri > processor->current_pri ) {
2157 dispatch_counts.bound_other++;
2158 cause_ast_check(processor);
2159 }
2160 }
2161
2162 if (pset != PROCESSOR_SET_NULL)
2163 simple_unlock(&pset->sched_lock);
2164
2165 processor_unlock(processor);
2166 }
2167 }
2168
2169 /*
2170 * Check for a possible preemption point in
2171 * the (current) thread.
2172 *
2173 * Called at splsched.
2174 */
2175 ast_t
2176 csw_check(
2177 thread_t thread,
2178 processor_t processor)
2179 {
2180 int current_pri = thread->sched_pri;
2181 ast_t result = AST_NONE;
2182 run_queue_t runq;
2183
2184 if (first_timeslice(processor)) {
2185 runq = &processor->processor_set->runq;
2186 if (runq->highq >= BASEPRI_RTQUEUES)
2187 return (AST_PREEMPT | AST_URGENT);
2188
2189 if (runq->highq > current_pri) {
2190 if (runq->urgency > 0)
2191 return (AST_PREEMPT | AST_URGENT);
2192
2193 result |= AST_PREEMPT;
2194 }
2195
2196 runq = &processor->runq;
2197 if (runq->highq > current_pri) {
2198 if (runq->urgency > 0)
2199 return (AST_PREEMPT | AST_URGENT);
2200
2201 result |= AST_PREEMPT;
2202 }
2203 }
2204 else {
2205 runq = &processor->processor_set->runq;
2206 if (runq->highq >= current_pri) {
2207 if (runq->urgency > 0)
2208 return (AST_PREEMPT | AST_URGENT);
2209
2210 result |= AST_PREEMPT;
2211 }
2212
2213 runq = &processor->runq;
2214 if (runq->highq >= current_pri) {
2215 if (runq->urgency > 0)
2216 return (AST_PREEMPT | AST_URGENT);
2217
2218 result |= AST_PREEMPT;
2219 }
2220 }
2221
2222 if (result != AST_NONE)
2223 return (result);
2224
2225 if (thread->state & TH_SUSP)
2226 result |= AST_PREEMPT;
2227
2228 return (result);
2229 }
2230
2231 /*
2232 * set_sched_pri:
2233 *
2234 * Set the scheduled priority of the specified thread.
2235 *
2236 * This may cause the thread to change queues.
2237 *
2238 * Thread must be locked.
2239 */
2240 void
2241 set_sched_pri(
2242 thread_t thread,
2243 int priority)
2244 {
2245 register struct run_queue *rq = run_queue_remove(thread);
2246
2247 if ( !(thread->sched_mode & TH_MODE_TIMESHARE) &&
2248 (priority >= BASEPRI_PREEMPT ||
2249 (thread->task_priority < MINPRI_KERNEL &&
2250 thread->task_priority >= BASEPRI_BACKGROUND &&
2251 priority > thread->task_priority) ) )
2252 thread->sched_mode |= TH_MODE_PREEMPT;
2253 else
2254 thread->sched_mode &= ~TH_MODE_PREEMPT;
2255
2256 thread->sched_pri = priority;
2257 if (rq != RUN_QUEUE_NULL)
2258 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
2259 else
2260 if (thread->state & TH_RUN) {
2261 processor_t processor = thread->last_processor;
2262
2263 if (thread == current_thread()) {
2264 ast_t preempt = csw_check(thread, processor);
2265
2266 if (preempt != AST_NONE)
2267 ast_on(preempt);
2268 processor->current_pri = priority;
2269 }
2270 else
2271 if ( processor != PROCESSOR_NULL &&
2272 processor->active_thread == thread )
2273 cause_ast_check(processor);
2274 }
2275 }
2276
2277 #if 0
2278
2279 static void
2280 run_queue_check(
2281 run_queue_t rq,
2282 thread_t thread)
2283 {
2284 queue_t q;
2285 queue_entry_t qe;
2286
2287 if (rq != thread->runq)
2288 panic("run_queue_check: thread runq");
2289
2290 if (thread->sched_pri > MAXPRI || thread->sched_pri < MINPRI)
2291 panic("run_queue_check: thread sched_pri");
2292
2293 q = &rq->queues[thread->sched_pri];
2294 qe = queue_first(q);
2295 while (!queue_end(q, qe)) {
2296 if (qe == (queue_entry_t)thread)
2297 return;
2298
2299 qe = queue_next(qe);
2300 }
2301
2302 panic("run_queue_check: end");
2303 }
2304
2305 #endif /* DEBUG */
2306
2307 /*
2308 * run_queue_remove:
2309 *
2310 * Remove a thread from its current run queue and
2311 * return the run queue if successful.
2312 *
2313 * Thread must be locked.
2314 */
2315 run_queue_t
2316 run_queue_remove(
2317 thread_t thread)
2318 {
2319 register run_queue_t rq = thread->runq;
2320
2321 /*
2322 * If rq is RUN_QUEUE_NULL, the thread will stay out of the
2323 * run queues because the caller locked the thread. Otherwise
2324 * the thread is on a run queue, but could be chosen for dispatch
2325 * and removed.
2326 */
2327 if (rq != RUN_QUEUE_NULL) {
2328 processor_set_t pset = thread->processor_set;
2329 processor_t processor = thread->bound_processor;
2330
2331 /*
2332 * The run queues are locked by the pset scheduling
2333 * lock, except when a processor is off-line the
2334 * local run queue is locked by the processor lock.
2335 */
2336 if (processor != PROCESSOR_NULL) {
2337 processor_lock(processor);
2338 pset = processor->processor_set;
2339 }
2340
2341 if (pset != PROCESSOR_SET_NULL)
2342 simple_lock(&pset->sched_lock);
2343
2344 if (rq == thread->runq) {
2345 /*
2346 * Thread is on a run queue and we have a lock on
2347 * that run queue.
2348 */
2349 remqueue(&rq->queues[0], (queue_entry_t)thread);
2350 rq->count--;
2351 if (thread->sched_mode & TH_MODE_PREEMPT)
2352 rq->urgency--;
2353 assert(rq->urgency >= 0);
2354
2355 if (queue_empty(rq->queues + thread->sched_pri)) {
2356 /* update run queue status */
2357 if (thread->sched_pri != IDLEPRI)
2358 clrbit(MAXPRI - thread->sched_pri, rq->bitmap);
2359 rq->highq = MAXPRI - ffsbit(rq->bitmap);
2360 }
2361
2362 thread->runq = RUN_QUEUE_NULL;
2363 }
2364 else {
2365 /*
2366 * The thread left the run queue before we could
2367 * lock the run queue.
2368 */
2369 assert(thread->runq == RUN_QUEUE_NULL);
2370 rq = RUN_QUEUE_NULL;
2371 }
2372
2373 if (pset != PROCESSOR_SET_NULL)
2374 simple_unlock(&pset->sched_lock);
2375
2376 if (processor != PROCESSOR_NULL)
2377 processor_unlock(processor);
2378 }
2379
2380 return (rq);
2381 }
2382
2383 /*
2384 * choose_thread:
2385 *
2386 * Remove a thread to execute from the run queues
2387 * and return it.
2388 *
2389 * Called with pset scheduling lock held.
2390 */
2391 static thread_t
2392 choose_thread(
2393 processor_set_t pset,
2394 processor_t processor)
2395 {
2396 register run_queue_t runq;
2397 register thread_t thread;
2398 register queue_t q;
2399
2400 runq = &processor->runq;
2401
2402 if (runq->count > 0 && runq->highq >= pset->runq.highq) {
2403 q = runq->queues + runq->highq;
2404
2405 thread = (thread_t)q->next;
2406 ((queue_entry_t)thread)->next->prev = q;
2407 q->next = ((queue_entry_t)thread)->next;
2408 thread->runq = RUN_QUEUE_NULL;
2409 runq->count--;
2410 if (thread->sched_mode & TH_MODE_PREEMPT)
2411 runq->urgency--;
2412 assert(runq->urgency >= 0);
2413 if (queue_empty(q)) {
2414 if (runq->highq != IDLEPRI)
2415 clrbit(MAXPRI - runq->highq, runq->bitmap);
2416 runq->highq = MAXPRI - ffsbit(runq->bitmap);
2417 }
2418
2419 processor->deadline = UINT64_MAX;
2420
2421 return (thread);
2422 }
2423
2424 runq = &pset->runq;
2425
2426 assert(runq->count > 0);
2427 q = runq->queues + runq->highq;
2428
2429 thread = (thread_t)q->next;
2430 ((queue_entry_t)thread)->next->prev = q;
2431 q->next = ((queue_entry_t)thread)->next;
2432 thread->runq = RUN_QUEUE_NULL;
2433 runq->count--;
2434 if (runq->highq >= BASEPRI_RTQUEUES)
2435 processor->deadline = thread->realtime.deadline;
2436 else
2437 processor->deadline = UINT64_MAX;
2438 if (thread->sched_mode & TH_MODE_PREEMPT)
2439 runq->urgency--;
2440 assert(runq->urgency >= 0);
2441 if (queue_empty(q)) {
2442 if (runq->highq != IDLEPRI)
2443 clrbit(MAXPRI - runq->highq, runq->bitmap);
2444 runq->highq = MAXPRI - ffsbit(runq->bitmap);
2445 }
2446
2447 timeshare_quanta_update(pset);
2448
2449 return (thread);
2450 }
2451
2452 static processor_t
2453 delay_idle(
2454 processor_t processor,
2455 thread_t self)
2456 {
2457 int *gcount, *lcount;
2458 uint64_t abstime, spin, limit;
2459
2460 lcount = &processor->runq.count;
2461 gcount = &processor->processor_set->runq.count;
2462
2463 abstime = mach_absolute_time();
2464 limit = abstime + delay_idle_limit;
2465 spin = abstime + delay_idle_spin;
2466
2467 timer_event((uint32_t)abstime, &processor->idle_thread->system_timer);
2468
2469 self->options |= TH_OPT_DELAYIDLE;
2470
2471 while ( *gcount == 0 && *lcount == 0 &&
2472 (self->state & TH_WAIT) != 0 &&
2473 abstime < limit ) {
2474 if (abstime >= spin) {
2475 (void)spllo();
2476
2477 (void)splsched();
2478 processor = current_processor();
2479 lcount = &processor->runq.count;
2480 gcount = &processor->processor_set->runq.count;
2481
2482 abstime = mach_absolute_time();
2483 spin = abstime + delay_idle_spin;
2484
2485 timer_event((uint32_t)abstime, &processor->idle_thread->system_timer);
2486 }
2487 else
2488 abstime = mach_absolute_time();
2489 }
2490
2491 timer_event((uint32_t)abstime, &self->system_timer);
2492
2493 self->options &= ~TH_OPT_DELAYIDLE;
2494
2495 return (processor);
2496 }
2497
2498 /*
2499 * no_dispatch_count counts number of times processors go non-idle
2500 * without being dispatched. This should be very rare.
2501 */
2502 int no_dispatch_count = 0;
2503
2504 /*
2505 * This is the idle processor thread, which just looks for other threads
2506 * to execute.
2507 */
2508 void
2509 idle_thread(void)
2510 {
2511 register processor_t processor;
2512 register thread_t *threadp;
2513 register int *gcount;
2514 register int *lcount;
2515 register thread_t new_thread;
2516 register int state;
2517 register processor_set_t pset;
2518 ast_t *myast = ast_pending();
2519
2520 processor = current_processor();
2521
2522 threadp = &processor->next_thread;
2523 lcount = &processor->runq.count;
2524 gcount = &processor->processor_set->runq.count;
2525
2526 (void)splsched();
2527 while ( (*threadp == THREAD_NULL) &&
2528 (*gcount == 0) && (*lcount == 0) ) {
2529
2530 /* check for ASTs while we wait */
2531 if (*myast &~ (AST_SCHEDULING | AST_BSD)) {
2532 /* no ASTs for us */
2533 *myast &= AST_NONE;
2534 (void)spllo();
2535 }
2536 else
2537 machine_idle();
2538
2539 (void)splsched();
2540 }
2541
2542 /*
2543 * This is not a switch statement to avoid the
2544 * bounds checking code in the common case.
2545 */
2546 pset = processor->processor_set;
2547 simple_lock(&pset->sched_lock);
2548
2549 state = processor->state;
2550 if (state == PROCESSOR_DISPATCHING) {
2551 /*
2552 * Commmon case -- cpu dispatched.
2553 */
2554 new_thread = *threadp;
2555 *threadp = (volatile thread_t) THREAD_NULL;
2556 processor->state = PROCESSOR_RUNNING;
2557 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
2558
2559 if ( pset->runq.highq >= BASEPRI_RTQUEUES &&
2560 new_thread->sched_pri >= BASEPRI_RTQUEUES ) {
2561 register run_queue_t runq = &pset->runq;
2562 register queue_t q;
2563
2564 q = runq->queues + runq->highq;
2565 if (((thread_t)q->next)->realtime.deadline <
2566 processor->deadline) {
2567 thread_t thread = new_thread;
2568
2569 new_thread = (thread_t)q->next;
2570 ((queue_entry_t)new_thread)->next->prev = q;
2571 q->next = ((queue_entry_t)new_thread)->next;
2572 new_thread->runq = RUN_QUEUE_NULL;
2573 processor->deadline = new_thread->realtime.deadline;
2574 assert(new_thread->sched_mode & TH_MODE_PREEMPT);
2575 runq->count--; runq->urgency--;
2576 if (queue_empty(q)) {
2577 if (runq->highq != IDLEPRI)
2578 clrbit(MAXPRI - runq->highq, runq->bitmap);
2579 runq->highq = MAXPRI - ffsbit(runq->bitmap);
2580 }
2581 dispatch_counts.missed_realtime++;
2582 simple_unlock(&pset->sched_lock);
2583
2584 thread_lock(thread);
2585 thread_setrun(thread, SCHED_HEADQ);
2586 thread_unlock(thread);
2587
2588 counter(c_idle_thread_handoff++);
2589 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
2590 /*NOTREACHED*/
2591 }
2592 simple_unlock(&pset->sched_lock);
2593
2594 counter(c_idle_thread_handoff++);
2595 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
2596 /*NOTREACHED*/
2597 }
2598
2599 if ( processor->runq.highq > new_thread->sched_pri ||
2600 pset->runq.highq > new_thread->sched_pri ) {
2601 thread_t thread = new_thread;
2602
2603 new_thread = choose_thread(pset, processor);
2604 dispatch_counts.missed_other++;
2605 simple_unlock(&pset->sched_lock);
2606
2607 thread_lock(thread);
2608 thread_setrun(thread, SCHED_HEADQ);
2609 thread_unlock(thread);
2610
2611 counter(c_idle_thread_handoff++);
2612 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
2613 /* NOTREACHED */
2614 }
2615 else {
2616 simple_unlock(&pset->sched_lock);
2617
2618 counter(c_idle_thread_handoff++);
2619 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
2620 /* NOTREACHED */
2621 }
2622 }
2623 else
2624 if (state == PROCESSOR_IDLE) {
2625 /*
2626 * Processor was not dispatched (Rare).
2627 * Set it running again and force a
2628 * reschedule.
2629 */
2630 no_dispatch_count++;
2631 pset->idle_count--;
2632 remqueue(&pset->idle_queue, (queue_entry_t)processor);
2633 processor->state = PROCESSOR_RUNNING;
2634 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
2635 simple_unlock(&pset->sched_lock);
2636
2637 counter(c_idle_thread_block++);
2638 thread_block((thread_continue_t)idle_thread);
2639 /* NOTREACHED */
2640 }
2641 else
2642 if (state == PROCESSOR_SHUTDOWN) {
2643 /*
2644 * Going off-line. Force a
2645 * reschedule.
2646 */
2647 if ((new_thread = (thread_t)*threadp) != THREAD_NULL) {
2648 *threadp = (volatile thread_t) THREAD_NULL;
2649 processor->deadline = UINT64_MAX;
2650 simple_unlock(&pset->sched_lock);
2651
2652 thread_lock(new_thread);
2653 thread_setrun(new_thread, SCHED_HEADQ);
2654 thread_unlock(new_thread);
2655 }
2656 else
2657 simple_unlock(&pset->sched_lock);
2658
2659 counter(c_idle_thread_block++);
2660 thread_block((thread_continue_t)idle_thread);
2661 /* NOTREACHED */
2662 }
2663
2664 simple_unlock(&pset->sched_lock);
2665
2666 panic("idle_thread: state %d\n", processor->state);
2667 /*NOTREACHED*/
2668 }
2669
2670 kern_return_t
2671 idle_thread_create(
2672 processor_t processor)
2673 {
2674 kern_return_t result;
2675 thread_t thread;
2676 spl_t s;
2677
2678 result = kernel_thread_create((thread_continue_t)idle_thread, NULL, MAXPRI_KERNEL, &thread);
2679 if (result != KERN_SUCCESS)
2680 return (result);
2681
2682 s = splsched();
2683 thread_lock(thread);
2684 thread->bound_processor = processor;
2685 processor->idle_thread = thread;
2686 thread->sched_pri = thread->priority = IDLEPRI;
2687 thread->state = (TH_RUN | TH_IDLE);
2688 thread_unlock(thread);
2689 splx(s);
2690
2691 thread_deallocate(thread);
2692
2693 return (KERN_SUCCESS);
2694 }
2695
2696 static uint64_t sched_tick_deadline;
2697
2698 /*
2699 * sched_startup:
2700 *
2701 * Kicks off scheduler services.
2702 *
2703 * Called at splsched.
2704 */
2705 void
2706 sched_startup(void)
2707 {
2708 kern_return_t result;
2709 thread_t thread;
2710
2711 result = kernel_thread_start_priority((thread_continue_t)sched_tick_thread, NULL, MAXPRI_KERNEL, &thread);
2712 if (result != KERN_SUCCESS)
2713 panic("sched_startup");
2714
2715 thread_deallocate(thread);
2716
2717 /*
2718 * Yield to the sched_tick_thread while it times
2719 * a series of context switches back. It stores
2720 * the baseline value in sched_cswtime.
2721 *
2722 * The current thread is the only other thread
2723 * active at this point.
2724 */
2725 while (sched_cswtime == 0)
2726 thread_block(THREAD_CONTINUE_NULL);
2727
2728 thread_daemon_init();
2729
2730 thread_call_initialize();
2731 }
2732
2733 /*
2734 * sched_tick_thread:
2735 *
2736 * Perform periodic bookkeeping functions about ten
2737 * times per second.
2738 */
2739 static void
2740 sched_tick_continue(void)
2741 {
2742 uint64_t abstime = mach_absolute_time();
2743
2744 sched_tick++;
2745
2746 /*
2747 * Compute various averages.
2748 */
2749 compute_averages();
2750
2751 /*
2752 * Scan the run queues for threads which
2753 * may need to be updated.
2754 */
2755 thread_update_scan();
2756
2757 clock_deadline_for_periodic_event(sched_tick_interval, abstime,
2758 &sched_tick_deadline);
2759
2760 assert_wait_deadline((event_t)sched_tick_thread, THREAD_UNINT, sched_tick_deadline);
2761 thread_block((thread_continue_t)sched_tick_continue);
2762 /*NOTREACHED*/
2763 }
2764
2765 /*
2766 * Time a series of context switches to determine
2767 * a baseline. Toss the high and low and return
2768 * the one-way value.
2769 */
2770 static uint32_t
2771 time_cswitch(void)
2772 {
2773 uint32_t new, hi, low, accum;
2774 uint64_t abstime;
2775 int i, tries = 7;
2776
2777 accum = hi = low = 0;
2778 for (i = 0; i < tries; ++i) {
2779 abstime = mach_absolute_time();
2780 thread_block(THREAD_CONTINUE_NULL);
2781
2782 new = mach_absolute_time() - abstime;
2783
2784 if (i == 0)
2785 accum = hi = low = new;
2786 else {
2787 if (new < low)
2788 low = new;
2789 else
2790 if (new > hi)
2791 hi = new;
2792 accum += new;
2793 }
2794 }
2795
2796 return ((accum - hi - low) / (2 * (tries - 2)));
2797 }
2798
2799 void
2800 sched_tick_thread(void)
2801 {
2802 sched_cswtime = time_cswitch();
2803
2804 sched_tick_deadline = mach_absolute_time();
2805
2806 sched_tick_continue();
2807 /*NOTREACHED*/
2808 }
2809
2810 /*
2811 * thread_update_scan / runq_scan:
2812 *
2813 * Scan the run queues to account for timesharing threads
2814 * which need to be updated.
2815 *
2816 * Scanner runs in two passes. Pass one squirrels likely
2817 * threads away in an array, pass two does the update.
2818 *
2819 * This is necessary because the run queue is locked for
2820 * the candidate scan, but the thread is locked for the update.
2821 *
2822 * Array should be sized to make forward progress, without
2823 * disabling preemption for long periods.
2824 */
2825
2826 #define THREAD_UPDATE_SIZE 128
2827
2828 static thread_t thread_update_array[THREAD_UPDATE_SIZE];
2829 static int thread_update_count = 0;
2830
2831 /*
2832 * Scan a runq for candidate threads.
2833 *
2834 * Returns TRUE if retry is needed.
2835 */
2836 static boolean_t
2837 runq_scan(
2838 run_queue_t runq)
2839 {
2840 register int count;
2841 register queue_t q;
2842 register thread_t thread;
2843
2844 if ((count = runq->count) > 0) {
2845 q = runq->queues + runq->highq;
2846 while (count > 0) {
2847 queue_iterate(q, thread, thread_t, links) {
2848 if ( thread->sched_stamp != sched_tick &&
2849 (thread->sched_mode & TH_MODE_TIMESHARE) ) {
2850 if (thread_update_count == THREAD_UPDATE_SIZE)
2851 return (TRUE);
2852
2853 thread_update_array[thread_update_count++] = thread;
2854 thread_reference_internal(thread);
2855 }
2856
2857 count--;
2858 }
2859
2860 q--;
2861 }
2862 }
2863
2864 return (FALSE);
2865 }
2866
2867 static void
2868 thread_update_scan(void)
2869 {
2870 register boolean_t restart_needed;
2871 register processor_set_t pset = &default_pset;
2872 register processor_t processor;
2873 register thread_t thread;
2874 spl_t s;
2875
2876 do {
2877 s = splsched();
2878 simple_lock(&pset->sched_lock);
2879 restart_needed = runq_scan(&pset->runq);
2880 simple_unlock(&pset->sched_lock);
2881
2882 if (!restart_needed) {
2883 simple_lock(&pset->sched_lock);
2884 processor = (processor_t)queue_first(&pset->processors);
2885 while (!queue_end(&pset->processors, (queue_entry_t)processor)) {
2886 if ((restart_needed = runq_scan(&processor->runq)) != 0)
2887 break;
2888
2889 thread = processor->idle_thread;
2890 if (thread->sched_stamp != sched_tick) {
2891 if (thread_update_count == THREAD_UPDATE_SIZE) {
2892 restart_needed = TRUE;
2893 break;
2894 }
2895
2896 thread_update_array[thread_update_count++] = thread;
2897 thread_reference_internal(thread);
2898 }
2899
2900 processor = (processor_t)queue_next(&processor->processors);
2901 }
2902 simple_unlock(&pset->sched_lock);
2903 }
2904 splx(s);
2905
2906 /*
2907 * Ok, we now have a collection of candidates -- fix them.
2908 */
2909 while (thread_update_count > 0) {
2910 thread = thread_update_array[--thread_update_count];
2911 thread_update_array[thread_update_count] = THREAD_NULL;
2912
2913 s = splsched();
2914 thread_lock(thread);
2915 if ( !(thread->state & (TH_WAIT|TH_SUSP)) &&
2916 thread->sched_stamp != sched_tick )
2917 update_priority(thread);
2918 thread_unlock(thread);
2919 splx(s);
2920
2921 thread_deallocate(thread);
2922 }
2923 } while (restart_needed);
2924 }
2925
2926 /*
2927 * Just in case someone doesn't use the macro
2928 */
2929 #undef thread_wakeup
2930 void
2931 thread_wakeup(
2932 event_t x);
2933
2934 void
2935 thread_wakeup(
2936 event_t x)
2937 {
2938 thread_wakeup_with_result(x, THREAD_AWAKENED);
2939 }
2940
2941 boolean_t
2942 preemption_enabled(void)
2943 {
2944 return (get_preemption_level() == 0 && ml_get_interrupts_enabled());
2945 }
2946
2947 #if DEBUG
2948 static boolean_t
2949 thread_runnable(
2950 thread_t thread)
2951 {
2952 return ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN);
2953 }
2954 #endif /* DEBUG */
2955
2956 #if MACH_KDB
2957 #include <ddb/db_output.h>
2958 #define printf kdbprintf
2959 void db_sched(void);
2960
2961 void
2962 db_sched(void)
2963 {
2964 iprintf("Scheduling Statistics:\n");
2965 db_indent += 2;
2966 iprintf("Thread invocations: csw %d same %d\n",
2967 c_thread_invoke_csw, c_thread_invoke_same);
2968 #if MACH_COUNTERS
2969 iprintf("Thread block: calls %d\n",
2970 c_thread_block_calls);
2971 iprintf("Idle thread:\n\thandoff %d block %d no_dispatch %d\n",
2972 c_idle_thread_handoff,
2973 c_idle_thread_block, no_dispatch_count);
2974 iprintf("Sched thread blocks: %d\n", c_sched_thread_block);
2975 #endif /* MACH_COUNTERS */
2976 db_indent -= 2;
2977 }
2978
2979 #include <ddb/db_output.h>
2980 void db_show_thread_log(void);
2981
2982 void
2983 db_show_thread_log(void)
2984 {
2985 }
2986 #endif /* MACH_KDB */