]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/sched_prim.c
xnu-792.24.17.tar.gz
[apple/xnu.git] / osfmk / kern / sched_prim.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_FREE_COPYRIGHT@
24 */
25 /*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50 /*
51 */
52 /*
53 * File: sched_prim.c
54 * Author: Avadis Tevanian, Jr.
55 * Date: 1986
56 *
57 * Scheduling primitives
58 *
59 */
60
61 #include <debug.h>
62 #include <mach_kdb.h>
63
64 #include <ddb/db_output.h>
65
66 #include <mach/mach_types.h>
67 #include <mach/machine.h>
68 #include <mach/policy.h>
69 #include <mach/sync_policy.h>
70
71 #include <machine/machine_routines.h>
72 #include <machine/sched_param.h>
73
74 #include <kern/kern_types.h>
75 #include <kern/clock.h>
76 #include <kern/counters.h>
77 #include <kern/cpu_number.h>
78 #include <kern/cpu_data.h>
79 #include <kern/debug.h>
80 #include <kern/lock.h>
81 #include <kern/macro_help.h>
82 #include <kern/machine.h>
83 #include <kern/misc_protos.h>
84 #include <kern/processor.h>
85 #include <kern/queue.h>
86 #include <kern/sched.h>
87 #include <kern/sched_prim.h>
88 #include <kern/syscall_subr.h>
89 #include <kern/task.h>
90 #include <kern/thread.h>
91 #include <kern/wait_queue.h>
92
93 #include <vm/pmap.h>
94 #include <vm/vm_kern.h>
95 #include <vm/vm_map.h>
96
97 #include <sys/kdebug.h>
98
99 #ifdef __ppc__
100 #include <ppc/pms.h>
101 #endif
102
103 #define DEFAULT_PREEMPTION_RATE 100 /* (1/s) */
104 int default_preemption_rate = DEFAULT_PREEMPTION_RATE;
105
106 #define MAX_UNSAFE_QUANTA 800
107 int max_unsafe_quanta = MAX_UNSAFE_QUANTA;
108
109 #define MAX_POLL_QUANTA 2
110 int max_poll_quanta = MAX_POLL_QUANTA;
111
112 #define SCHED_POLL_YIELD_SHIFT 4 /* 1/16 */
113 int sched_poll_yield_shift = SCHED_POLL_YIELD_SHIFT;
114
115 uint64_t max_unsafe_computation;
116 uint32_t sched_safe_duration;
117 uint64_t max_poll_computation;
118
119 uint32_t std_quantum;
120 uint32_t min_std_quantum;
121
122 uint32_t std_quantum_us;
123
124 uint32_t max_rt_quantum;
125 uint32_t min_rt_quantum;
126
127 uint32_t sched_cswtime;
128
129 static uint32_t delay_idle_limit, delay_idle_spin;
130 static processor_t delay_idle(
131 processor_t processor,
132 thread_t self);
133
134 unsigned sched_tick;
135 uint32_t sched_tick_interval;
136
137 uint32_t sched_pri_shift;
138
139 /* Forwards */
140 void wait_queues_init(void);
141
142 static void load_shift_init(void);
143
144 static thread_t choose_thread(
145 processor_set_t pset,
146 processor_t processor);
147
148 static void thread_update_scan(void);
149
150 #if DEBUG
151 static
152 boolean_t thread_runnable(
153 thread_t thread);
154
155 #endif /*DEBUG*/
156
157
158 /*
159 * State machine
160 *
161 * states are combinations of:
162 * R running
163 * W waiting (or on wait queue)
164 * N non-interruptible
165 * O swapped out
166 * I being swapped in
167 *
168 * init action
169 * assert_wait thread_block clear_wait swapout swapin
170 *
171 * R RW, RWN R; setrun - -
172 * RN RWN RN; setrun - -
173 *
174 * RW W R -
175 * RWN WN RN -
176 *
177 * W R; setrun WO
178 * WN RN; setrun -
179 *
180 * RO - - R
181 *
182 */
183
184 /*
185 * Waiting protocols and implementation:
186 *
187 * Each thread may be waiting for exactly one event; this event
188 * is set using assert_wait(). That thread may be awakened either
189 * by performing a thread_wakeup_prim() on its event,
190 * or by directly waking that thread up with clear_wait().
191 *
192 * The implementation of wait events uses a hash table. Each
193 * bucket is queue of threads having the same hash function
194 * value; the chain for the queue (linked list) is the run queue
195 * field. [It is not possible to be waiting and runnable at the
196 * same time.]
197 *
198 * Locks on both the thread and on the hash buckets govern the
199 * wait event field and the queue chain field. Because wakeup
200 * operations only have the event as an argument, the event hash
201 * bucket must be locked before any thread.
202 *
203 * Scheduling operations may also occur at interrupt level; therefore,
204 * interrupts below splsched() must be prevented when holding
205 * thread or hash bucket locks.
206 *
207 * The wait event hash table declarations are as follows:
208 */
209
210 #define NUMQUEUES 59
211
212 struct wait_queue wait_queues[NUMQUEUES];
213
214 #define wait_hash(event) \
215 ((((int)(event) < 0)? ~(int)(event): (int)(event)) % NUMQUEUES)
216
217 int8_t sched_load_shifts[NRQS];
218
219 void
220 sched_init(void)
221 {
222 /*
223 * Calculate the timeslicing quantum
224 * in us.
225 */
226 if (default_preemption_rate < 1)
227 default_preemption_rate = DEFAULT_PREEMPTION_RATE;
228 std_quantum_us = (1000 * 1000) / default_preemption_rate;
229
230 printf("standard timeslicing quantum is %d us\n", std_quantum_us);
231
232 sched_safe_duration = (2 * max_unsafe_quanta / default_preemption_rate) *
233 (1 << SCHED_TICK_SHIFT);
234
235 wait_queues_init();
236 load_shift_init();
237 pset_init(&default_pset);
238 sched_tick = 0;
239 ast_init();
240 }
241
242 void
243 sched_timebase_init(void)
244 {
245 uint64_t abstime;
246 uint32_t shift;
247
248 /* standard timeslicing quantum */
249 clock_interval_to_absolutetime_interval(
250 std_quantum_us, NSEC_PER_USEC, &abstime);
251 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
252 std_quantum = abstime;
253
254 /* smallest remaining quantum (250 us) */
255 clock_interval_to_absolutetime_interval(250, NSEC_PER_USEC, &abstime);
256 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
257 min_std_quantum = abstime;
258
259 /* smallest rt computaton (50 us) */
260 clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime);
261 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
262 min_rt_quantum = abstime;
263
264 /* maximum rt computation (50 ms) */
265 clock_interval_to_absolutetime_interval(
266 50, 1000*NSEC_PER_USEC, &abstime);
267 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
268 max_rt_quantum = abstime;
269
270 /* scheduler tick interval */
271 clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT,
272 NSEC_PER_USEC, &abstime);
273 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
274 sched_tick_interval = abstime;
275
276 /*
277 * Compute conversion factor from usage to
278 * timesharing priorities with 5/8 ** n aging.
279 */
280 abstime = (abstime * 5) / 3;
281 for (shift = 0; abstime > BASEPRI_DEFAULT; ++shift)
282 abstime >>= 1;
283 sched_pri_shift = shift;
284
285 max_unsafe_computation = max_unsafe_quanta * std_quantum;
286 max_poll_computation = max_poll_quanta * std_quantum;
287
288 /* delay idle constant(s) (60, 1 us) */
289 clock_interval_to_absolutetime_interval(60, NSEC_PER_USEC, &abstime);
290 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
291 delay_idle_limit = abstime;
292
293 clock_interval_to_absolutetime_interval(1, NSEC_PER_USEC, &abstime);
294 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
295 delay_idle_spin = abstime;
296 }
297
298 void
299 wait_queues_init(void)
300 {
301 register int i;
302
303 for (i = 0; i < NUMQUEUES; i++) {
304 wait_queue_init(&wait_queues[i], SYNC_POLICY_FIFO);
305 }
306 }
307
308 /*
309 * Set up values for timeshare
310 * loading factors.
311 */
312 static void
313 load_shift_init(void)
314 {
315 int8_t k, *p = sched_load_shifts;
316 uint32_t i, j;
317
318 *p++ = INT8_MIN; *p++ = 0;
319
320 for (i = j = 2, k = 1; i < NRQS; ++k) {
321 for (j <<= 1; i < j; ++i)
322 *p++ = k;
323 }
324 }
325
326 /*
327 * Thread wait timer expiration.
328 */
329 void
330 thread_timer_expire(
331 void *p0,
332 __unused void *p1)
333 {
334 thread_t thread = p0;
335 spl_t s;
336
337 s = splsched();
338 thread_lock(thread);
339 if (--thread->wait_timer_active == 0) {
340 if (thread->wait_timer_is_set) {
341 thread->wait_timer_is_set = FALSE;
342 clear_wait_internal(thread, THREAD_TIMED_OUT);
343 }
344 }
345 thread_unlock(thread);
346 splx(s);
347 }
348
349 /*
350 * thread_set_timer:
351 *
352 * Set a timer for the current thread, if the thread
353 * is ready to wait. Must be called between assert_wait()
354 * and thread_block().
355 */
356 void
357 thread_set_timer(
358 uint32_t interval,
359 uint32_t scale_factor)
360 {
361 thread_t thread = current_thread();
362 uint64_t deadline;
363 spl_t s;
364
365 s = splsched();
366 thread_lock(thread);
367 if ((thread->state & TH_WAIT) != 0) {
368 clock_interval_to_deadline(interval, scale_factor, &deadline);
369 if (!timer_call_enter(&thread->wait_timer, deadline))
370 thread->wait_timer_active++;
371 thread->wait_timer_is_set = TRUE;
372 }
373 thread_unlock(thread);
374 splx(s);
375 }
376
377 void
378 thread_set_timer_deadline(
379 uint64_t deadline)
380 {
381 thread_t thread = current_thread();
382 spl_t s;
383
384 s = splsched();
385 thread_lock(thread);
386 if ((thread->state & TH_WAIT) != 0) {
387 if (!timer_call_enter(&thread->wait_timer, deadline))
388 thread->wait_timer_active++;
389 thread->wait_timer_is_set = TRUE;
390 }
391 thread_unlock(thread);
392 splx(s);
393 }
394
395 void
396 thread_cancel_timer(void)
397 {
398 thread_t thread = current_thread();
399 spl_t s;
400
401 s = splsched();
402 thread_lock(thread);
403 if (thread->wait_timer_is_set) {
404 if (timer_call_cancel(&thread->wait_timer))
405 thread->wait_timer_active--;
406 thread->wait_timer_is_set = FALSE;
407 }
408 thread_unlock(thread);
409 splx(s);
410 }
411
412 /*
413 * thread_unblock:
414 *
415 * Unblock thread on wake up.
416 *
417 * Returns TRUE if the thread is still running.
418 *
419 * Thread must be locked.
420 */
421 boolean_t
422 thread_unblock(
423 thread_t thread,
424 wait_result_t wresult)
425 {
426 boolean_t result = FALSE;
427
428 /*
429 * Set wait_result.
430 */
431 thread->wait_result = wresult;
432
433 /*
434 * Cancel pending wait timer.
435 */
436 if (thread->wait_timer_is_set) {
437 if (timer_call_cancel(&thread->wait_timer))
438 thread->wait_timer_active--;
439 thread->wait_timer_is_set = FALSE;
440 }
441
442 /*
443 * Update scheduling state.
444 */
445 thread->state &= ~(TH_WAIT|TH_UNINT);
446
447 if (!(thread->state & TH_RUN)) {
448 thread->state |= TH_RUN;
449
450 /*
451 * Mark unblocked if call out.
452 */
453 if (thread->options & TH_OPT_CALLOUT)
454 call_thread_unblock();
455
456 /*
457 * Update pset run counts.
458 */
459 pset_run_incr(thread->processor_set);
460 if (thread->sched_mode & TH_MODE_TIMESHARE)
461 pset_share_incr(thread->processor_set);
462 }
463 else
464 result = TRUE;
465
466 /*
467 * Calculate deadline for real-time threads.
468 */
469 if (thread->sched_mode & TH_MODE_REALTIME) {
470 thread->realtime.deadline = mach_absolute_time();
471 thread->realtime.deadline += thread->realtime.constraint;
472 }
473
474 /*
475 * Clear old quantum, fail-safe computation, etc.
476 */
477 thread->current_quantum = 0;
478 thread->computation_metered = 0;
479 thread->reason = AST_NONE;
480
481 KERNEL_DEBUG_CONSTANT(
482 MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE,
483 (int)thread, (int)thread->sched_pri, 0, 0, 0);
484
485 return (result);
486 }
487
488 /*
489 * Routine: thread_go
490 * Purpose:
491 * Unblock and dispatch thread.
492 * Conditions:
493 * thread lock held, IPC locks may be held.
494 * thread must have been pulled from wait queue under same lock hold.
495 * Returns:
496 * KERN_SUCCESS - Thread was set running
497 * KERN_NOT_WAITING - Thread was not waiting
498 */
499 kern_return_t
500 thread_go(
501 thread_t thread,
502 wait_result_t wresult)
503 {
504 assert(thread->at_safe_point == FALSE);
505 assert(thread->wait_event == NO_EVENT64);
506 assert(thread->wait_queue == WAIT_QUEUE_NULL);
507
508 if ((thread->state & (TH_WAIT|TH_TERMINATE)) == TH_WAIT) {
509 if (!thread_unblock(thread, wresult))
510 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
511
512 return (KERN_SUCCESS);
513 }
514
515 return (KERN_NOT_WAITING);
516 }
517
518 /*
519 * Routine: thread_mark_wait_locked
520 * Purpose:
521 * Mark a thread as waiting. If, given the circumstances,
522 * it doesn't want to wait (i.e. already aborted), then
523 * indicate that in the return value.
524 * Conditions:
525 * at splsched() and thread is locked.
526 */
527 __private_extern__
528 wait_result_t
529 thread_mark_wait_locked(
530 thread_t thread,
531 wait_interrupt_t interruptible)
532 {
533 boolean_t at_safe_point;
534
535 /*
536 * The thread may have certain types of interrupts/aborts masked
537 * off. Even if the wait location says these types of interrupts
538 * are OK, we have to honor mask settings (outer-scoped code may
539 * not be able to handle aborts at the moment).
540 */
541 if (interruptible > (thread->options & TH_OPT_INTMASK))
542 interruptible = thread->options & TH_OPT_INTMASK;
543
544 at_safe_point = (interruptible == THREAD_ABORTSAFE);
545
546 if ( interruptible == THREAD_UNINT ||
547 !(thread->state & TH_ABORT) ||
548 (!at_safe_point &&
549 (thread->state & TH_ABORT_SAFELY)) ) {
550 thread->state |= (interruptible) ? TH_WAIT : (TH_WAIT | TH_UNINT);
551 thread->at_safe_point = at_safe_point;
552 return (thread->wait_result = THREAD_WAITING);
553 }
554 else
555 if (thread->state & TH_ABORT_SAFELY)
556 thread->state &= ~(TH_ABORT|TH_ABORT_SAFELY);
557
558 return (thread->wait_result = THREAD_INTERRUPTED);
559 }
560
561 /*
562 * Routine: thread_interrupt_level
563 * Purpose:
564 * Set the maximum interruptible state for the
565 * current thread. The effective value of any
566 * interruptible flag passed into assert_wait
567 * will never exceed this.
568 *
569 * Useful for code that must not be interrupted,
570 * but which calls code that doesn't know that.
571 * Returns:
572 * The old interrupt level for the thread.
573 */
574 __private_extern__
575 wait_interrupt_t
576 thread_interrupt_level(
577 wait_interrupt_t new_level)
578 {
579 thread_t thread = current_thread();
580 wait_interrupt_t result = thread->options & TH_OPT_INTMASK;
581
582 thread->options = (thread->options & ~TH_OPT_INTMASK) | (new_level & TH_OPT_INTMASK);
583
584 return result;
585 }
586
587 /*
588 * Check to see if an assert wait is possible, without actually doing one.
589 * This is used by debug code in locks and elsewhere to verify that it is
590 * always OK to block when trying to take a blocking lock (since waiting
591 * for the actual assert_wait to catch the case may make it hard to detect
592 * this case.
593 */
594 boolean_t
595 assert_wait_possible(void)
596 {
597
598 thread_t thread;
599
600 #if DEBUG
601 if(debug_mode) return TRUE; /* Always succeed in debug mode */
602 #endif
603
604 thread = current_thread();
605
606 return (thread == NULL || wait_queue_assert_possible(thread));
607 }
608
609 /*
610 * assert_wait:
611 *
612 * Assert that the current thread is about to go to
613 * sleep until the specified event occurs.
614 */
615 wait_result_t
616 assert_wait(
617 event_t event,
618 wait_interrupt_t interruptible)
619 {
620 register wait_queue_t wq;
621 register int index;
622
623 assert(event != NO_EVENT);
624
625 index = wait_hash(event);
626 wq = &wait_queues[index];
627 return wait_queue_assert_wait(wq, event, interruptible, 0);
628 }
629
630 wait_result_t
631 assert_wait_timeout(
632 event_t event,
633 wait_interrupt_t interruptible,
634 uint32_t interval,
635 uint32_t scale_factor)
636 {
637 thread_t thread = current_thread();
638 wait_result_t wresult;
639 wait_queue_t wqueue;
640 uint64_t deadline;
641 spl_t s;
642
643 assert(event != NO_EVENT);
644 wqueue = &wait_queues[wait_hash(event)];
645
646 s = splsched();
647 wait_queue_lock(wqueue);
648 thread_lock(thread);
649
650 clock_interval_to_deadline(interval, scale_factor, &deadline);
651 wresult = wait_queue_assert_wait64_locked(wqueue, (uint32_t)event,
652 interruptible, deadline, thread);
653
654 thread_unlock(thread);
655 wait_queue_unlock(wqueue);
656 splx(s);
657
658 return (wresult);
659 }
660
661 wait_result_t
662 assert_wait_deadline(
663 event_t event,
664 wait_interrupt_t interruptible,
665 uint64_t deadline)
666 {
667 thread_t thread = current_thread();
668 wait_result_t wresult;
669 wait_queue_t wqueue;
670 spl_t s;
671
672 assert(event != NO_EVENT);
673 wqueue = &wait_queues[wait_hash(event)];
674
675 s = splsched();
676 wait_queue_lock(wqueue);
677 thread_lock(thread);
678
679 wresult = wait_queue_assert_wait64_locked(wqueue, (uint32_t)event,
680 interruptible, deadline, thread);
681
682 thread_unlock(thread);
683 wait_queue_unlock(wqueue);
684 splx(s);
685
686 return (wresult);
687 }
688
689 /*
690 * thread_sleep_fast_usimple_lock:
691 *
692 * Cause the current thread to wait until the specified event
693 * occurs. The specified simple_lock is unlocked before releasing
694 * the cpu and re-acquired as part of waking up.
695 *
696 * This is the simple lock sleep interface for components that use a
697 * faster version of simple_lock() than is provided by usimple_lock().
698 */
699 __private_extern__ wait_result_t
700 thread_sleep_fast_usimple_lock(
701 event_t event,
702 simple_lock_t lock,
703 wait_interrupt_t interruptible)
704 {
705 wait_result_t res;
706
707 res = assert_wait(event, interruptible);
708 if (res == THREAD_WAITING) {
709 simple_unlock(lock);
710 res = thread_block(THREAD_CONTINUE_NULL);
711 simple_lock(lock);
712 }
713 return res;
714 }
715
716
717 /*
718 * thread_sleep_usimple_lock:
719 *
720 * Cause the current thread to wait until the specified event
721 * occurs. The specified usimple_lock is unlocked before releasing
722 * the cpu and re-acquired as part of waking up.
723 *
724 * This is the simple lock sleep interface for components where
725 * simple_lock() is defined in terms of usimple_lock().
726 */
727 wait_result_t
728 thread_sleep_usimple_lock(
729 event_t event,
730 usimple_lock_t lock,
731 wait_interrupt_t interruptible)
732 {
733 wait_result_t res;
734
735 res = assert_wait(event, interruptible);
736 if (res == THREAD_WAITING) {
737 usimple_unlock(lock);
738 res = thread_block(THREAD_CONTINUE_NULL);
739 usimple_lock(lock);
740 }
741 return res;
742 }
743
744 /*
745 * thread_sleep_mutex:
746 *
747 * Cause the current thread to wait until the specified event
748 * occurs. The specified mutex is unlocked before releasing
749 * the cpu. The mutex will be re-acquired before returning.
750 *
751 * JMM - Add hint to make sure mutex is available before rousting
752 */
753 wait_result_t
754 thread_sleep_mutex(
755 event_t event,
756 mutex_t *mutex,
757 wait_interrupt_t interruptible)
758 {
759 wait_result_t res;
760
761 res = assert_wait(event, interruptible);
762 if (res == THREAD_WAITING) {
763 mutex_unlock(mutex);
764 res = thread_block(THREAD_CONTINUE_NULL);
765 mutex_lock(mutex);
766 }
767 return res;
768 }
769
770 /*
771 * thread_sleep_mutex_deadline:
772 *
773 * Cause the current thread to wait until the specified event
774 * (or deadline) occurs. The specified mutex is unlocked before
775 * releasing the cpu. The mutex will be re-acquired before returning.
776 */
777 wait_result_t
778 thread_sleep_mutex_deadline(
779 event_t event,
780 mutex_t *mutex,
781 uint64_t deadline,
782 wait_interrupt_t interruptible)
783 {
784 wait_result_t res;
785
786 res = assert_wait_deadline(event, interruptible, deadline);
787 if (res == THREAD_WAITING) {
788 mutex_unlock(mutex);
789 res = thread_block(THREAD_CONTINUE_NULL);
790 mutex_lock(mutex);
791 }
792 return res;
793 }
794
795 /*
796 * thread_sleep_lock_write:
797 *
798 * Cause the current thread to wait until the specified event
799 * occurs. The specified (write) lock is unlocked before releasing
800 * the cpu. The (write) lock will be re-acquired before returning.
801 */
802 wait_result_t
803 thread_sleep_lock_write(
804 event_t event,
805 lock_t *lock,
806 wait_interrupt_t interruptible)
807 {
808 wait_result_t res;
809
810 res = assert_wait(event, interruptible);
811 if (res == THREAD_WAITING) {
812 lock_write_done(lock);
813 res = thread_block(THREAD_CONTINUE_NULL);
814 lock_write(lock);
815 }
816 return res;
817 }
818
819 /*
820 * thread_stop:
821 *
822 * Force a preemption point for a thread and wait
823 * for it to stop running. Arbitrates access among
824 * multiple stop requests. (released by unstop)
825 *
826 * The thread must enter a wait state and stop via a
827 * separate means.
828 *
829 * Returns FALSE if interrupted.
830 */
831 boolean_t
832 thread_stop(
833 thread_t thread)
834 {
835 wait_result_t wresult;
836 spl_t s;
837
838 s = splsched();
839 wake_lock(thread);
840
841 while (thread->state & TH_SUSP) {
842 thread->wake_active = TRUE;
843 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
844 wake_unlock(thread);
845 splx(s);
846
847 if (wresult == THREAD_WAITING)
848 wresult = thread_block(THREAD_CONTINUE_NULL);
849
850 if (wresult != THREAD_AWAKENED)
851 return (FALSE);
852
853 s = splsched();
854 wake_lock(thread);
855 }
856
857 thread_lock(thread);
858 thread->state |= TH_SUSP;
859
860 while (thread->state & TH_RUN) {
861 processor_t processor = thread->last_processor;
862
863 if ( processor != PROCESSOR_NULL &&
864 processor->state == PROCESSOR_RUNNING &&
865 processor->active_thread == thread )
866 cause_ast_check(processor);
867 thread_unlock(thread);
868
869 thread->wake_active = TRUE;
870 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
871 wake_unlock(thread);
872 splx(s);
873
874 if (wresult == THREAD_WAITING)
875 wresult = thread_block(THREAD_CONTINUE_NULL);
876
877 if (wresult != THREAD_AWAKENED) {
878 thread_unstop(thread);
879 return (FALSE);
880 }
881
882 s = splsched();
883 wake_lock(thread);
884 thread_lock(thread);
885 }
886
887 thread_unlock(thread);
888 wake_unlock(thread);
889 splx(s);
890
891 return (TRUE);
892 }
893
894 /*
895 * thread_unstop:
896 *
897 * Release a previous stop request and set
898 * the thread running if appropriate.
899 *
900 * Use only after a successful stop operation.
901 */
902 void
903 thread_unstop(
904 thread_t thread)
905 {
906 spl_t s = splsched();
907
908 wake_lock(thread);
909 thread_lock(thread);
910
911 if ((thread->state & (TH_RUN|TH_WAIT|TH_SUSP)) == TH_SUSP) {
912 thread->state &= ~TH_SUSP;
913 thread_unblock(thread, THREAD_AWAKENED);
914
915 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
916 }
917 else
918 if (thread->state & TH_SUSP) {
919 thread->state &= ~TH_SUSP;
920
921 if (thread->wake_active) {
922 thread->wake_active = FALSE;
923 thread_unlock(thread);
924 wake_unlock(thread);
925 splx(s);
926
927 thread_wakeup(&thread->wake_active);
928 return;
929 }
930 }
931
932 thread_unlock(thread);
933 wake_unlock(thread);
934 splx(s);
935 }
936
937 /*
938 * thread_wait:
939 *
940 * Wait for a thread to stop running. (non-interruptible)
941 *
942 */
943 void
944 thread_wait(
945 thread_t thread)
946 {
947 wait_result_t wresult;
948 spl_t s = splsched();
949
950 wake_lock(thread);
951 thread_lock(thread);
952
953 while (thread->state & TH_RUN) {
954 processor_t processor = thread->last_processor;
955
956 if ( processor != PROCESSOR_NULL &&
957 processor->state == PROCESSOR_RUNNING &&
958 processor->active_thread == thread )
959 cause_ast_check(processor);
960 thread_unlock(thread);
961
962 thread->wake_active = TRUE;
963 wresult = assert_wait(&thread->wake_active, THREAD_UNINT);
964 wake_unlock(thread);
965 splx(s);
966
967 if (wresult == THREAD_WAITING)
968 thread_block(THREAD_CONTINUE_NULL);
969
970 s = splsched();
971 wake_lock(thread);
972 thread_lock(thread);
973 }
974
975 thread_unlock(thread);
976 wake_unlock(thread);
977 splx(s);
978 }
979
980 /*
981 * Routine: clear_wait_internal
982 *
983 * Clear the wait condition for the specified thread.
984 * Start the thread executing if that is appropriate.
985 * Arguments:
986 * thread thread to awaken
987 * result Wakeup result the thread should see
988 * Conditions:
989 * At splsched
990 * the thread is locked.
991 * Returns:
992 * KERN_SUCCESS thread was rousted out a wait
993 * KERN_FAILURE thread was waiting but could not be rousted
994 * KERN_NOT_WAITING thread was not waiting
995 */
996 __private_extern__ kern_return_t
997 clear_wait_internal(
998 thread_t thread,
999 wait_result_t wresult)
1000 {
1001 wait_queue_t wq = thread->wait_queue;
1002 int i = LockTimeOut;
1003
1004 do {
1005 if (wresult == THREAD_INTERRUPTED && (thread->state & TH_UNINT))
1006 return (KERN_FAILURE);
1007
1008 if (wq != WAIT_QUEUE_NULL) {
1009 if (wait_queue_lock_try(wq)) {
1010 wait_queue_pull_thread_locked(wq, thread, TRUE);
1011 /* wait queue unlocked, thread still locked */
1012 }
1013 else {
1014 thread_unlock(thread);
1015 delay(1);
1016
1017 thread_lock(thread);
1018 if (wq != thread->wait_queue)
1019 return (KERN_NOT_WAITING);
1020
1021 continue;
1022 }
1023 }
1024
1025 return (thread_go(thread, wresult));
1026 } while (--i > 0);
1027
1028 panic("clear_wait_internal: deadlock: thread=0x%x, wq=0x%x, cpu=%d\n",
1029 thread, wq, cpu_number());
1030
1031 return (KERN_FAILURE);
1032 }
1033
1034
1035 /*
1036 * clear_wait:
1037 *
1038 * Clear the wait condition for the specified thread. Start the thread
1039 * executing if that is appropriate.
1040 *
1041 * parameters:
1042 * thread thread to awaken
1043 * result Wakeup result the thread should see
1044 */
1045 kern_return_t
1046 clear_wait(
1047 thread_t thread,
1048 wait_result_t result)
1049 {
1050 kern_return_t ret;
1051 spl_t s;
1052
1053 s = splsched();
1054 thread_lock(thread);
1055 ret = clear_wait_internal(thread, result);
1056 thread_unlock(thread);
1057 splx(s);
1058 return ret;
1059 }
1060
1061
1062 /*
1063 * thread_wakeup_prim:
1064 *
1065 * Common routine for thread_wakeup, thread_wakeup_with_result,
1066 * and thread_wakeup_one.
1067 *
1068 */
1069 kern_return_t
1070 thread_wakeup_prim(
1071 event_t event,
1072 boolean_t one_thread,
1073 wait_result_t result)
1074 {
1075 register wait_queue_t wq;
1076 register int index;
1077
1078 index = wait_hash(event);
1079 wq = &wait_queues[index];
1080 if (one_thread)
1081 return (wait_queue_wakeup_one(wq, event, result));
1082 else
1083 return (wait_queue_wakeup_all(wq, event, result));
1084 }
1085
1086 /*
1087 * thread_bind:
1088 *
1089 * Force a thread to execute on the specified processor.
1090 *
1091 * Returns the previous binding. PROCESSOR_NULL means
1092 * not bound.
1093 *
1094 * XXX - DO NOT export this to users - XXX
1095 */
1096 processor_t
1097 thread_bind(
1098 register thread_t thread,
1099 processor_t processor)
1100 {
1101 processor_t prev;
1102 run_queue_t runq = RUN_QUEUE_NULL;
1103 spl_t s;
1104
1105 s = splsched();
1106 thread_lock(thread);
1107 prev = thread->bound_processor;
1108 if (prev != PROCESSOR_NULL)
1109 runq = run_queue_remove(thread);
1110
1111 thread->bound_processor = processor;
1112
1113 if (runq != RUN_QUEUE_NULL)
1114 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1115 thread_unlock(thread);
1116 splx(s);
1117
1118 return (prev);
1119 }
1120
1121 struct {
1122 uint32_t idle_pset_last,
1123 idle_pset_any,
1124 idle_bound;
1125
1126 uint32_t pset_self,
1127 pset_last,
1128 pset_other,
1129 bound_self,
1130 bound_other;
1131
1132 uint32_t realtime_self,
1133 realtime_last,
1134 realtime_other;
1135
1136 uint32_t missed_realtime,
1137 missed_other;
1138 } dispatch_counts;
1139
1140 /*
1141 * Select a thread for the current processor to run.
1142 *
1143 * May select the current thread, which must be locked.
1144 */
1145 thread_t
1146 thread_select(
1147 register processor_t processor)
1148 {
1149 register thread_t thread;
1150 processor_set_t pset;
1151 boolean_t other_runnable;
1152
1153 /*
1154 * Check for other non-idle runnable threads.
1155 */
1156 pset = processor->processor_set;
1157 thread = processor->active_thread;
1158
1159 /* Update the thread's priority */
1160 if (thread->sched_stamp != sched_tick)
1161 update_priority(thread);
1162
1163 processor->current_pri = thread->sched_pri;
1164
1165 simple_lock(&pset->sched_lock);
1166
1167 other_runnable = processor->runq.count > 0 || pset->runq.count > 0;
1168
1169 if ( thread->state == TH_RUN &&
1170 thread->processor_set == pset &&
1171 (thread->bound_processor == PROCESSOR_NULL ||
1172 thread->bound_processor == processor) ) {
1173 if ( thread->sched_pri >= BASEPRI_RTQUEUES &&
1174 first_timeslice(processor) ) {
1175 if (pset->runq.highq >= BASEPRI_RTQUEUES) {
1176 register run_queue_t runq = &pset->runq;
1177 register queue_t q;
1178
1179 q = runq->queues + runq->highq;
1180 if (((thread_t)q->next)->realtime.deadline <
1181 processor->deadline) {
1182 thread = (thread_t)q->next;
1183 ((queue_entry_t)thread)->next->prev = q;
1184 q->next = ((queue_entry_t)thread)->next;
1185 thread->runq = RUN_QUEUE_NULL;
1186 assert(thread->sched_mode & TH_MODE_PREEMPT);
1187 runq->count--; runq->urgency--;
1188 if (queue_empty(q)) {
1189 if (runq->highq != IDLEPRI)
1190 clrbit(MAXPRI - runq->highq, runq->bitmap);
1191 runq->highq = MAXPRI - ffsbit(runq->bitmap);
1192 }
1193 }
1194 }
1195
1196 processor->deadline = thread->realtime.deadline;
1197
1198 simple_unlock(&pset->sched_lock);
1199
1200 return (thread);
1201 }
1202
1203 if ( (!other_runnable ||
1204 (processor->runq.highq < thread->sched_pri &&
1205 pset->runq.highq < thread->sched_pri)) ) {
1206
1207 /* I am the highest priority runnable (non-idle) thread */
1208
1209 processor->deadline = UINT64_MAX;
1210
1211 simple_unlock(&pset->sched_lock);
1212
1213 return (thread);
1214 }
1215 }
1216
1217 if (other_runnable)
1218 thread = choose_thread(pset, processor);
1219 else {
1220 /*
1221 * Nothing is runnable, so set this processor idle if it
1222 * was running. Return its idle thread.
1223 */
1224 if (processor->state == PROCESSOR_RUNNING) {
1225 remqueue(&pset->active_queue, (queue_entry_t)processor);
1226 processor->state = PROCESSOR_IDLE;
1227
1228 enqueue_tail(&pset->idle_queue, (queue_entry_t)processor);
1229 pset->idle_count++;
1230 }
1231
1232 processor->deadline = UINT64_MAX;
1233
1234 thread = processor->idle_thread;
1235 }
1236
1237 simple_unlock(&pset->sched_lock);
1238
1239 return (thread);
1240 }
1241
1242 /*
1243 * Perform a context switch and start executing the new thread.
1244 *
1245 * Returns FALSE on failure, and the thread is re-dispatched.
1246 *
1247 * Called at splsched.
1248 */
1249
1250 #define funnel_release_check(thread, debug) \
1251 MACRO_BEGIN \
1252 if ((thread)->funnel_state & TH_FN_OWNED) { \
1253 (thread)->funnel_state = TH_FN_REFUNNEL; \
1254 KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE, \
1255 (thread)->funnel_lock, (debug), 0, 0, 0); \
1256 funnel_unlock((thread)->funnel_lock); \
1257 } \
1258 MACRO_END
1259
1260 #define funnel_refunnel_check(thread, debug) \
1261 MACRO_BEGIN \
1262 if ((thread)->funnel_state & TH_FN_REFUNNEL) { \
1263 kern_return_t result = (thread)->wait_result; \
1264 \
1265 (thread)->funnel_state = 0; \
1266 KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE, \
1267 (thread)->funnel_lock, (debug), 0, 0, 0); \
1268 funnel_lock((thread)->funnel_lock); \
1269 KERNEL_DEBUG(0x6032430 | DBG_FUNC_NONE, \
1270 (thread)->funnel_lock, (debug), 0, 0, 0); \
1271 (thread)->funnel_state = TH_FN_OWNED; \
1272 (thread)->wait_result = result; \
1273 } \
1274 MACRO_END
1275
1276 boolean_t
1277 thread_invoke(
1278 register thread_t old_thread,
1279 register thread_t new_thread,
1280 ast_t reason)
1281 {
1282 thread_continue_t new_cont, continuation = old_thread->continuation;
1283 void *new_param, *parameter = old_thread->parameter;
1284 processor_t processor;
1285 thread_t prev_thread;
1286
1287 if (get_preemption_level() != 0)
1288 panic("thread_invoke: preemption_level %d\n",
1289 get_preemption_level());
1290
1291 assert(old_thread == current_thread());
1292
1293 /*
1294 * Mark thread interruptible.
1295 */
1296 thread_lock(new_thread);
1297 new_thread->state &= ~TH_UNINT;
1298
1299 assert(thread_runnable(new_thread));
1300
1301 /*
1302 * Allow time constraint threads to hang onto
1303 * a stack.
1304 */
1305 if ( (old_thread->sched_mode & TH_MODE_REALTIME) &&
1306 !old_thread->reserved_stack ) {
1307 old_thread->reserved_stack = old_thread->kernel_stack;
1308 }
1309
1310 if (continuation != NULL) {
1311 if (!new_thread->kernel_stack) {
1312 /*
1313 * If the old thread is using a privileged stack,
1314 * check to see whether we can exchange it with
1315 * that of the new thread.
1316 */
1317 if ( old_thread->kernel_stack == old_thread->reserved_stack &&
1318 !new_thread->reserved_stack)
1319 goto need_stack;
1320
1321 /*
1322 * Context switch by performing a stack handoff.
1323 */
1324 new_cont = new_thread->continuation;
1325 new_thread->continuation = NULL;
1326 new_param = new_thread->parameter;
1327 new_thread->parameter = NULL;
1328
1329 processor = current_processor();
1330 processor->active_thread = new_thread;
1331 processor->current_pri = new_thread->sched_pri;
1332 new_thread->last_processor = processor;
1333 ast_context(new_thread);
1334 thread_unlock(new_thread);
1335
1336 current_task()->csw++;
1337
1338 old_thread->reason = reason;
1339
1340 processor->last_dispatch = mach_absolute_time();
1341 timer_event((uint32_t)processor->last_dispatch,
1342 &new_thread->system_timer);
1343
1344 thread_done(old_thread, new_thread, processor);
1345
1346 machine_stack_handoff(old_thread, new_thread);
1347
1348 thread_begin(new_thread, processor);
1349
1350 /*
1351 * Now dispatch the old thread.
1352 */
1353 thread_dispatch(old_thread);
1354
1355 counter_always(c_thread_invoke_hits++);
1356
1357 funnel_refunnel_check(new_thread, 2);
1358 (void) spllo();
1359
1360 assert(new_cont);
1361 call_continuation(new_cont, new_param, new_thread->wait_result);
1362 /*NOTREACHED*/
1363 }
1364 else
1365 if (new_thread == old_thread) {
1366 /* same thread but with continuation */
1367 counter(++c_thread_invoke_same);
1368 thread_unlock(new_thread);
1369
1370 funnel_refunnel_check(new_thread, 3);
1371 (void) spllo();
1372
1373 call_continuation(continuation, parameter, new_thread->wait_result);
1374 /*NOTREACHED*/
1375 }
1376 }
1377 else {
1378 /*
1379 * Check that the new thread has a stack
1380 */
1381 if (!new_thread->kernel_stack) {
1382 need_stack:
1383 if (!stack_alloc_try(new_thread)) {
1384 counter_always(c_thread_invoke_misses++);
1385 thread_unlock(new_thread);
1386 thread_stack_enqueue(new_thread);
1387 return (FALSE);
1388 }
1389 }
1390 else
1391 if (new_thread == old_thread) {
1392 counter(++c_thread_invoke_same);
1393 thread_unlock(new_thread);
1394 return (TRUE);
1395 }
1396 }
1397
1398 /*
1399 * Context switch by full context save.
1400 */
1401 processor = current_processor();
1402 processor->active_thread = new_thread;
1403 processor->current_pri = new_thread->sched_pri;
1404 new_thread->last_processor = processor;
1405 ast_context(new_thread);
1406 assert(thread_runnable(new_thread));
1407 thread_unlock(new_thread);
1408
1409 counter_always(c_thread_invoke_csw++);
1410 current_task()->csw++;
1411
1412 assert(old_thread->runq == RUN_QUEUE_NULL);
1413 old_thread->reason = reason;
1414
1415 processor->last_dispatch = mach_absolute_time();
1416 timer_event((uint32_t)processor->last_dispatch, &new_thread->system_timer);
1417
1418 thread_done(old_thread, new_thread, processor);
1419
1420 /*
1421 * This is where we actually switch register context,
1422 * and address space if required. Control will not
1423 * return here immediately.
1424 */
1425 prev_thread = machine_switch_context(old_thread, continuation, new_thread);
1426
1427 /*
1428 * We are still old_thread, possibly on a different processor,
1429 * and new_thread is now stale.
1430 */
1431 thread_begin(old_thread, old_thread->last_processor);
1432
1433 /*
1434 * Now dispatch the thread which resumed us.
1435 */
1436 thread_dispatch(prev_thread);
1437
1438 if (continuation) {
1439 funnel_refunnel_check(old_thread, 3);
1440 (void) spllo();
1441
1442 call_continuation(continuation, parameter, old_thread->wait_result);
1443 /*NOTREACHED*/
1444 }
1445
1446 return (TRUE);
1447 }
1448
1449 /*
1450 * thread_done:
1451 *
1452 * Perform calculations for thread
1453 * finishing execution on the current processor.
1454 *
1455 * Called at splsched.
1456 */
1457 void
1458 thread_done(
1459 thread_t old_thread,
1460 thread_t new_thread,
1461 processor_t processor)
1462 {
1463 if (!(old_thread->state & TH_IDLE)) {
1464 /*
1465 * Compute remainder of current quantum.
1466 */
1467 if ( first_timeslice(processor) &&
1468 processor->quantum_end > processor->last_dispatch )
1469 old_thread->current_quantum =
1470 (processor->quantum_end - processor->last_dispatch);
1471 else
1472 old_thread->current_quantum = 0;
1473
1474 if (old_thread->sched_mode & TH_MODE_REALTIME) {
1475 /*
1476 * Cancel the deadline if the thread has
1477 * consumed the entire quantum.
1478 */
1479 if (old_thread->current_quantum == 0) {
1480 old_thread->realtime.deadline = UINT64_MAX;
1481 old_thread->reason |= AST_QUANTUM;
1482 }
1483 }
1484 else {
1485 /*
1486 * For non-realtime threads treat a tiny
1487 * remaining quantum as an expired quantum
1488 * but include what's left next time.
1489 */
1490 if (old_thread->current_quantum < min_std_quantum) {
1491 old_thread->reason |= AST_QUANTUM;
1492 old_thread->current_quantum += std_quantum;
1493 }
1494 }
1495
1496 /*
1497 * If we are doing a direct handoff then
1498 * give the remainder of our quantum to
1499 * the next thread.
1500 */
1501 if ((old_thread->reason & (AST_HANDOFF|AST_QUANTUM)) == AST_HANDOFF) {
1502 new_thread->current_quantum = old_thread->current_quantum;
1503 old_thread->reason |= AST_QUANTUM;
1504 old_thread->current_quantum = 0;
1505 }
1506
1507 old_thread->last_switch = processor->last_dispatch;
1508
1509 old_thread->computation_metered +=
1510 (old_thread->last_switch - old_thread->computation_epoch);
1511 }
1512 }
1513
1514 /*
1515 * thread_begin:
1516 *
1517 * Set up for thread beginning execution on
1518 * the current processor.
1519 *
1520 * Called at splsched.
1521 */
1522 void
1523 thread_begin(
1524 thread_t thread,
1525 processor_t processor)
1526 {
1527 if (!(thread->state & TH_IDLE)) {
1528 /*
1529 * Give the thread a new quantum
1530 * if none remaining.
1531 */
1532 if (thread->current_quantum == 0)
1533 thread_quantum_init(thread);
1534
1535 /*
1536 * Set up quantum timer and timeslice.
1537 */
1538 processor->quantum_end =
1539 (processor->last_dispatch + thread->current_quantum);
1540 timer_call_enter1(&processor->quantum_timer,
1541 thread, processor->quantum_end);
1542
1543 processor_timeslice_setup(processor, thread);
1544
1545 thread->last_switch = processor->last_dispatch;
1546
1547 thread->computation_epoch = thread->last_switch;
1548 }
1549 else {
1550 timer_call_cancel(&processor->quantum_timer);
1551 processor->timeslice = 1;
1552 }
1553 }
1554
1555 /*
1556 * thread_dispatch:
1557 *
1558 * Handle previous thread at context switch. Re-dispatch
1559 * if still running, otherwise update run state and perform
1560 * special actions.
1561 *
1562 * Called at splsched.
1563 */
1564 void
1565 thread_dispatch(
1566 register thread_t thread)
1567 {
1568 /*
1569 * If blocked at a continuation, discard
1570 * the stack.
1571 */
1572 #ifndef i386
1573 if (thread->continuation != NULL && thread->kernel_stack)
1574 stack_free(thread);
1575 #endif
1576
1577 if (!(thread->state & TH_IDLE)) {
1578 wake_lock(thread);
1579 thread_lock(thread);
1580
1581 if (!(thread->state & TH_WAIT)) {
1582 /*
1583 * Still running.
1584 */
1585 if (thread->reason & AST_QUANTUM)
1586 thread_setrun(thread, SCHED_TAILQ);
1587 else
1588 if (thread->reason & AST_PREEMPT)
1589 thread_setrun(thread, SCHED_HEADQ);
1590 else
1591 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1592
1593 thread->reason = AST_NONE;
1594
1595 thread_unlock(thread);
1596 wake_unlock(thread);
1597 }
1598 else {
1599 boolean_t wake;
1600
1601 /*
1602 * Waiting.
1603 */
1604 thread->state &= ~TH_RUN;
1605
1606 wake = thread->wake_active;
1607 thread->wake_active = FALSE;
1608
1609 if (thread->sched_mode & TH_MODE_TIMESHARE)
1610 pset_share_decr(thread->processor_set);
1611 pset_run_decr(thread->processor_set);
1612
1613 thread_unlock(thread);
1614 wake_unlock(thread);
1615
1616 if (thread->options & TH_OPT_CALLOUT)
1617 call_thread_block();
1618
1619 if (wake)
1620 thread_wakeup((event_t)&thread->wake_active);
1621
1622 if (thread->state & TH_TERMINATE)
1623 thread_terminate_enqueue(thread);
1624 }
1625 }
1626 }
1627
1628 /*
1629 * thread_block_reason:
1630 *
1631 * Forces a reschedule, blocking the caller if a wait
1632 * has been asserted.
1633 *
1634 * If a continuation is specified, then thread_invoke will
1635 * attempt to discard the thread's kernel stack. When the
1636 * thread resumes, it will execute the continuation function
1637 * on a new kernel stack.
1638 */
1639 counter(mach_counter_t c_thread_block_calls = 0;)
1640
1641 wait_result_t
1642 thread_block_reason(
1643 thread_continue_t continuation,
1644 void *parameter,
1645 ast_t reason)
1646 {
1647 register thread_t self = current_thread();
1648 register processor_t processor;
1649 register thread_t new_thread;
1650 spl_t s;
1651
1652 counter(++c_thread_block_calls);
1653
1654 s = splsched();
1655
1656 if (!(reason & AST_PREEMPT))
1657 funnel_release_check(self, 2);
1658
1659 processor = current_processor();
1660
1661 /*
1662 * Delay switching to the idle thread under certain conditions.
1663 */
1664 if (s != FALSE && (self->state & (TH_IDLE|TH_TERMINATE|TH_WAIT)) == TH_WAIT) {
1665 if ( processor->processor_set->processor_count > 1 &&
1666 processor->processor_set->runq.count == 0 &&
1667 processor->runq.count == 0 )
1668 processor = delay_idle(processor, self);
1669 }
1670
1671 /* If we're explicitly yielding, force a subsequent quantum */
1672 if (reason & AST_YIELD)
1673 processor->timeslice = 0;
1674
1675 /* We're handling all scheduling AST's */
1676 ast_off(AST_SCHEDULING);
1677
1678 self->continuation = continuation;
1679 self->parameter = parameter;
1680
1681 thread_lock(self);
1682 new_thread = thread_select(processor);
1683 assert(new_thread && thread_runnable(new_thread));
1684 thread_unlock(self);
1685 while (!thread_invoke(self, new_thread, reason)) {
1686 thread_lock(self);
1687 new_thread = thread_select(processor);
1688 assert(new_thread && thread_runnable(new_thread));
1689 thread_unlock(self);
1690 }
1691
1692 funnel_refunnel_check(self, 5);
1693 splx(s);
1694
1695 return (self->wait_result);
1696 }
1697
1698 /*
1699 * thread_block:
1700 *
1701 * Block the current thread if a wait has been asserted.
1702 */
1703 wait_result_t
1704 thread_block(
1705 thread_continue_t continuation)
1706 {
1707 return thread_block_reason(continuation, NULL, AST_NONE);
1708 }
1709
1710 wait_result_t
1711 thread_block_parameter(
1712 thread_continue_t continuation,
1713 void *parameter)
1714 {
1715 return thread_block_reason(continuation, parameter, AST_NONE);
1716 }
1717
1718 /*
1719 * thread_run:
1720 *
1721 * Switch directly from the current thread to the
1722 * new thread, handing off our quantum if appropriate.
1723 *
1724 * New thread must be runnable, and not on a run queue.
1725 *
1726 * Called at splsched.
1727 */
1728 int
1729 thread_run(
1730 thread_t self,
1731 thread_continue_t continuation,
1732 void *parameter,
1733 thread_t new_thread)
1734 {
1735 ast_t handoff = AST_HANDOFF;
1736
1737 funnel_release_check(self, 3);
1738
1739 self->continuation = continuation;
1740 self->parameter = parameter;
1741
1742 while (!thread_invoke(self, new_thread, handoff)) {
1743 register processor_t processor = current_processor();
1744
1745 thread_lock(self);
1746 new_thread = thread_select(processor);
1747 thread_unlock(self);
1748 handoff = AST_NONE;
1749 }
1750
1751 funnel_refunnel_check(self, 6);
1752
1753 return (self->wait_result);
1754 }
1755
1756 /*
1757 * thread_continue:
1758 *
1759 * Called at splsched when a thread first receives
1760 * a new stack after a continuation.
1761 */
1762 void
1763 thread_continue(
1764 register thread_t old_thread)
1765 {
1766 register thread_t self = current_thread();
1767 register thread_continue_t continuation;
1768 register void *parameter;
1769
1770 continuation = self->continuation;
1771 self->continuation = NULL;
1772 parameter = self->parameter;
1773 self->parameter = NULL;
1774
1775 thread_begin(self, self->last_processor);
1776
1777 if (old_thread != THREAD_NULL)
1778 thread_dispatch(old_thread);
1779
1780 funnel_refunnel_check(self, 4);
1781
1782 if (old_thread != THREAD_NULL)
1783 (void)spllo();
1784
1785 call_continuation(continuation, parameter, self->wait_result);
1786 /*NOTREACHED*/
1787 }
1788
1789 /*
1790 * Enqueue thread on run queue. Thread must be locked,
1791 * and not already be on a run queue. Returns TRUE
1792 * if a preemption is indicated based on the state
1793 * of the run queue.
1794 *
1795 * Run queue must be locked, see run_queue_remove()
1796 * for more info.
1797 */
1798 static boolean_t
1799 run_queue_enqueue(
1800 register run_queue_t rq,
1801 register thread_t thread,
1802 integer_t options)
1803 {
1804 register int whichq = thread->sched_pri;
1805 register queue_t queue = &rq->queues[whichq];
1806 boolean_t result = FALSE;
1807
1808 assert(whichq >= MINPRI && whichq <= MAXPRI);
1809
1810 assert(thread->runq == RUN_QUEUE_NULL);
1811 if (queue_empty(queue)) {
1812 enqueue_tail(queue, (queue_entry_t)thread);
1813
1814 setbit(MAXPRI - whichq, rq->bitmap);
1815 if (whichq > rq->highq) {
1816 rq->highq = whichq;
1817 result = TRUE;
1818 }
1819 }
1820 else
1821 if (options & SCHED_HEADQ)
1822 enqueue_head(queue, (queue_entry_t)thread);
1823 else
1824 enqueue_tail(queue, (queue_entry_t)thread);
1825
1826 thread->runq = rq;
1827 if (thread->sched_mode & TH_MODE_PREEMPT)
1828 rq->urgency++;
1829 rq->count++;
1830
1831 return (result);
1832 }
1833
1834 /*
1835 * Enqueue a thread for realtime execution, similar
1836 * to above. Handles preemption directly.
1837 */
1838 static void
1839 realtime_schedule_insert(
1840 register processor_set_t pset,
1841 register thread_t thread)
1842 {
1843 register run_queue_t rq = &pset->runq;
1844 register int whichq = thread->sched_pri;
1845 register queue_t queue = &rq->queues[whichq];
1846 uint64_t deadline = thread->realtime.deadline;
1847 boolean_t try_preempt = FALSE;
1848
1849 assert(whichq >= BASEPRI_REALTIME && whichq <= MAXPRI);
1850
1851 assert(thread->runq == RUN_QUEUE_NULL);
1852 if (queue_empty(queue)) {
1853 enqueue_tail(queue, (queue_entry_t)thread);
1854
1855 setbit(MAXPRI - whichq, rq->bitmap);
1856 if (whichq > rq->highq)
1857 rq->highq = whichq;
1858 try_preempt = TRUE;
1859 }
1860 else {
1861 register thread_t entry = (thread_t)queue_first(queue);
1862
1863 while (TRUE) {
1864 if ( queue_end(queue, (queue_entry_t)entry) ||
1865 deadline < entry->realtime.deadline ) {
1866 entry = (thread_t)queue_prev((queue_entry_t)entry);
1867 break;
1868 }
1869
1870 entry = (thread_t)queue_next((queue_entry_t)entry);
1871 }
1872
1873 if ((queue_entry_t)entry == queue)
1874 try_preempt = TRUE;
1875
1876 insque((queue_entry_t)thread, (queue_entry_t)entry);
1877 }
1878
1879 thread->runq = rq;
1880 assert(thread->sched_mode & TH_MODE_PREEMPT);
1881 rq->count++; rq->urgency++;
1882
1883 if (try_preempt) {
1884 register processor_t processor;
1885
1886 processor = current_processor();
1887 if ( pset == processor->processor_set &&
1888 (thread->sched_pri > processor->current_pri ||
1889 deadline < processor->deadline ) ) {
1890 dispatch_counts.realtime_self++;
1891 simple_unlock(&pset->sched_lock);
1892
1893 ast_on(AST_PREEMPT | AST_URGENT);
1894 return;
1895 }
1896
1897 if ( pset->processor_count > 1 ||
1898 pset != processor->processor_set ) {
1899 processor_t myprocessor, lastprocessor;
1900 queue_entry_t next;
1901
1902 myprocessor = processor;
1903 processor = thread->last_processor;
1904 if ( processor != myprocessor &&
1905 processor != PROCESSOR_NULL &&
1906 processor->processor_set == pset &&
1907 processor->state == PROCESSOR_RUNNING &&
1908 (thread->sched_pri > processor->current_pri ||
1909 deadline < processor->deadline ) ) {
1910 dispatch_counts.realtime_last++;
1911 cause_ast_check(processor);
1912 simple_unlock(&pset->sched_lock);
1913 return;
1914 }
1915
1916 lastprocessor = processor;
1917 queue = &pset->active_queue;
1918 processor = (processor_t)queue_first(queue);
1919 while (!queue_end(queue, (queue_entry_t)processor)) {
1920 next = queue_next((queue_entry_t)processor);
1921
1922 if ( processor != myprocessor &&
1923 processor != lastprocessor &&
1924 (thread->sched_pri > processor->current_pri ||
1925 deadline < processor->deadline ) ) {
1926 if (!queue_end(queue, next)) {
1927 remqueue(queue, (queue_entry_t)processor);
1928 enqueue_tail(queue, (queue_entry_t)processor);
1929 }
1930 dispatch_counts.realtime_other++;
1931 cause_ast_check(processor);
1932 simple_unlock(&pset->sched_lock);
1933 return;
1934 }
1935
1936 processor = (processor_t)next;
1937 }
1938 }
1939 }
1940
1941 simple_unlock(&pset->sched_lock);
1942 }
1943
1944 /*
1945 * thread_setrun:
1946 *
1947 * Dispatch thread for execution, directly onto an idle
1948 * processor if possible. Else put on appropriate run
1949 * queue. (local if bound, else processor set)
1950 *
1951 * Thread must be locked.
1952 */
1953 void
1954 thread_setrun(
1955 register thread_t new_thread,
1956 integer_t options)
1957 {
1958 register processor_t processor;
1959 register processor_set_t pset;
1960 register thread_t thread;
1961 ast_t preempt = (options & SCHED_PREEMPT)?
1962 AST_PREEMPT: AST_NONE;
1963
1964 assert(thread_runnable(new_thread));
1965
1966 /*
1967 * Update priority if needed.
1968 */
1969 if (new_thread->sched_stamp != sched_tick)
1970 update_priority(new_thread);
1971
1972 /*
1973 * Check for urgent preemption.
1974 */
1975 if (new_thread->sched_mode & TH_MODE_PREEMPT)
1976 preempt = (AST_PREEMPT | AST_URGENT);
1977
1978 assert(new_thread->runq == RUN_QUEUE_NULL);
1979
1980 if ((processor = new_thread->bound_processor) == PROCESSOR_NULL) {
1981 /*
1982 * First try to dispatch on
1983 * the last processor.
1984 */
1985 pset = new_thread->processor_set;
1986 processor = new_thread->last_processor;
1987 if ( pset->processor_count > 1 &&
1988 processor != PROCESSOR_NULL &&
1989 processor->state == PROCESSOR_IDLE ) {
1990 processor_lock(processor);
1991 simple_lock(&pset->sched_lock);
1992 if ( processor->processor_set == pset &&
1993 processor->state == PROCESSOR_IDLE ) {
1994 remqueue(&pset->idle_queue, (queue_entry_t)processor);
1995 pset->idle_count--;
1996 processor->next_thread = new_thread;
1997 if (new_thread->sched_pri >= BASEPRI_RTQUEUES)
1998 processor->deadline = new_thread->realtime.deadline;
1999 else
2000 processor->deadline = UINT64_MAX;
2001 processor->state = PROCESSOR_DISPATCHING;
2002 dispatch_counts.idle_pset_last++;
2003 simple_unlock(&pset->sched_lock);
2004 processor_unlock(processor);
2005 if (processor != current_processor())
2006 machine_signal_idle(processor);
2007 return;
2008 }
2009 processor_unlock(processor);
2010 }
2011 else
2012 simple_lock(&pset->sched_lock);
2013
2014 /*
2015 * Next pick any idle processor
2016 * in the processor set.
2017 */
2018 if (pset->idle_count > 0) {
2019 processor = (processor_t)dequeue_head(&pset->idle_queue);
2020 pset->idle_count--;
2021 processor->next_thread = new_thread;
2022 if (new_thread->sched_pri >= BASEPRI_RTQUEUES)
2023 processor->deadline = new_thread->realtime.deadline;
2024 else
2025 processor->deadline = UINT64_MAX;
2026 processor->state = PROCESSOR_DISPATCHING;
2027 dispatch_counts.idle_pset_any++;
2028 simple_unlock(&pset->sched_lock);
2029 if (processor != current_processor())
2030 machine_signal_idle(processor);
2031 return;
2032 }
2033
2034 if (new_thread->sched_pri >= BASEPRI_RTQUEUES)
2035 realtime_schedule_insert(pset, new_thread);
2036 else {
2037 if (!run_queue_enqueue(&pset->runq, new_thread, options))
2038 preempt = AST_NONE;
2039
2040 /*
2041 * Update the timesharing quanta.
2042 */
2043 timeshare_quanta_update(pset);
2044
2045 /*
2046 * Preempt check.
2047 */
2048 if (preempt != AST_NONE) {
2049 /*
2050 * First try the current processor
2051 * if it is a member of the correct
2052 * processor set.
2053 */
2054 processor = current_processor();
2055 thread = processor->active_thread;
2056 if ( pset == processor->processor_set &&
2057 csw_needed(thread, processor) ) {
2058 dispatch_counts.pset_self++;
2059 simple_unlock(&pset->sched_lock);
2060
2061 ast_on(preempt);
2062 return;
2063 }
2064
2065 /*
2066 * If that failed and we have other
2067 * processors available keep trying.
2068 */
2069 if ( pset->processor_count > 1 ||
2070 pset != processor->processor_set ) {
2071 queue_t queue = &pset->active_queue;
2072 processor_t myprocessor, lastprocessor;
2073 queue_entry_t next;
2074
2075 /*
2076 * Next try the last processor
2077 * dispatched on.
2078 */
2079 myprocessor = processor;
2080 processor = new_thread->last_processor;
2081 if ( processor != myprocessor &&
2082 processor != PROCESSOR_NULL &&
2083 processor->processor_set == pset &&
2084 processor->state == PROCESSOR_RUNNING &&
2085 new_thread->sched_pri > processor->current_pri ) {
2086 dispatch_counts.pset_last++;
2087 cause_ast_check(processor);
2088 simple_unlock(&pset->sched_lock);
2089 return;
2090 }
2091
2092 /*
2093 * Lastly, pick any other
2094 * available processor.
2095 */
2096 lastprocessor = processor;
2097 processor = (processor_t)queue_first(queue);
2098 while (!queue_end(queue, (queue_entry_t)processor)) {
2099 next = queue_next((queue_entry_t)processor);
2100
2101 if ( processor != myprocessor &&
2102 processor != lastprocessor &&
2103 new_thread->sched_pri >
2104 processor->current_pri ) {
2105 if (!queue_end(queue, next)) {
2106 remqueue(queue, (queue_entry_t)processor);
2107 enqueue_tail(queue, (queue_entry_t)processor);
2108 }
2109 dispatch_counts.pset_other++;
2110 cause_ast_check(processor);
2111 simple_unlock(&pset->sched_lock);
2112 return;
2113 }
2114
2115 processor = (processor_t)next;
2116 }
2117 }
2118 }
2119
2120 simple_unlock(&pset->sched_lock);
2121 }
2122 }
2123 else {
2124 /*
2125 * Bound, can only run on bound processor. Have to lock
2126 * processor here because it may not be the current one.
2127 */
2128 processor_lock(processor);
2129 pset = processor->processor_set;
2130 if (pset != PROCESSOR_SET_NULL) {
2131 simple_lock(&pset->sched_lock);
2132 if (processor->state == PROCESSOR_IDLE) {
2133 remqueue(&pset->idle_queue, (queue_entry_t)processor);
2134 pset->idle_count--;
2135 processor->next_thread = new_thread;
2136 processor->deadline = UINT64_MAX;
2137 processor->state = PROCESSOR_DISPATCHING;
2138 dispatch_counts.idle_bound++;
2139 simple_unlock(&pset->sched_lock);
2140 processor_unlock(processor);
2141 if (processor != current_processor())
2142 machine_signal_idle(processor);
2143 return;
2144 }
2145 }
2146
2147 if (!run_queue_enqueue(&processor->runq, new_thread, options))
2148 preempt = AST_NONE;
2149
2150 if (preempt != AST_NONE) {
2151 if (processor == current_processor()) {
2152 thread = processor->active_thread;
2153 if (csw_needed(thread, processor)) {
2154 dispatch_counts.bound_self++;
2155 ast_on(preempt);
2156 }
2157 }
2158 else
2159 if ( processor->state == PROCESSOR_RUNNING &&
2160 new_thread->sched_pri > processor->current_pri ) {
2161 dispatch_counts.bound_other++;
2162 cause_ast_check(processor);
2163 }
2164 }
2165
2166 if (pset != PROCESSOR_SET_NULL)
2167 simple_unlock(&pset->sched_lock);
2168
2169 processor_unlock(processor);
2170 }
2171 }
2172
2173 /*
2174 * Check for a possible preemption point in
2175 * the (current) thread.
2176 *
2177 * Called at splsched.
2178 */
2179 ast_t
2180 csw_check(
2181 thread_t thread,
2182 processor_t processor)
2183 {
2184 int current_pri = thread->sched_pri;
2185 ast_t result = AST_NONE;
2186 run_queue_t runq;
2187
2188 if (first_timeslice(processor)) {
2189 runq = &processor->processor_set->runq;
2190 if (runq->highq >= BASEPRI_RTQUEUES)
2191 return (AST_PREEMPT | AST_URGENT);
2192
2193 if (runq->highq > current_pri) {
2194 if (runq->urgency > 0)
2195 return (AST_PREEMPT | AST_URGENT);
2196
2197 result |= AST_PREEMPT;
2198 }
2199
2200 runq = &processor->runq;
2201 if (runq->highq > current_pri) {
2202 if (runq->urgency > 0)
2203 return (AST_PREEMPT | AST_URGENT);
2204
2205 result |= AST_PREEMPT;
2206 }
2207 }
2208 else {
2209 runq = &processor->processor_set->runq;
2210 if (runq->highq >= current_pri) {
2211 if (runq->urgency > 0)
2212 return (AST_PREEMPT | AST_URGENT);
2213
2214 result |= AST_PREEMPT;
2215 }
2216
2217 runq = &processor->runq;
2218 if (runq->highq >= current_pri) {
2219 if (runq->urgency > 0)
2220 return (AST_PREEMPT | AST_URGENT);
2221
2222 result |= AST_PREEMPT;
2223 }
2224 }
2225
2226 if (result != AST_NONE)
2227 return (result);
2228
2229 if (thread->state & TH_SUSP)
2230 result |= AST_PREEMPT;
2231
2232 return (result);
2233 }
2234
2235 /*
2236 * set_sched_pri:
2237 *
2238 * Set the scheduled priority of the specified thread.
2239 *
2240 * This may cause the thread to change queues.
2241 *
2242 * Thread must be locked.
2243 */
2244 void
2245 set_sched_pri(
2246 thread_t thread,
2247 int priority)
2248 {
2249 register struct run_queue *rq = run_queue_remove(thread);
2250
2251 if ( !(thread->sched_mode & TH_MODE_TIMESHARE) &&
2252 (priority >= BASEPRI_PREEMPT ||
2253 (thread->task_priority < MINPRI_KERNEL &&
2254 thread->task_priority >= BASEPRI_BACKGROUND &&
2255 priority > thread->task_priority) ) )
2256 thread->sched_mode |= TH_MODE_PREEMPT;
2257 else
2258 thread->sched_mode &= ~TH_MODE_PREEMPT;
2259
2260 thread->sched_pri = priority;
2261 if (rq != RUN_QUEUE_NULL)
2262 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
2263 else
2264 if (thread->state & TH_RUN) {
2265 processor_t processor = thread->last_processor;
2266
2267 if (thread == current_thread()) {
2268 ast_t preempt = csw_check(thread, processor);
2269
2270 if (preempt != AST_NONE)
2271 ast_on(preempt);
2272 processor->current_pri = priority;
2273 }
2274 else
2275 if ( processor != PROCESSOR_NULL &&
2276 processor->active_thread == thread )
2277 cause_ast_check(processor);
2278 }
2279 }
2280
2281 #if 0
2282
2283 static void
2284 run_queue_check(
2285 run_queue_t rq,
2286 thread_t thread)
2287 {
2288 queue_t q;
2289 queue_entry_t qe;
2290
2291 if (rq != thread->runq)
2292 panic("run_queue_check: thread runq");
2293
2294 if (thread->sched_pri > MAXPRI || thread->sched_pri < MINPRI)
2295 panic("run_queue_check: thread sched_pri");
2296
2297 q = &rq->queues[thread->sched_pri];
2298 qe = queue_first(q);
2299 while (!queue_end(q, qe)) {
2300 if (qe == (queue_entry_t)thread)
2301 return;
2302
2303 qe = queue_next(qe);
2304 }
2305
2306 panic("run_queue_check: end");
2307 }
2308
2309 #endif /* DEBUG */
2310
2311 /*
2312 * run_queue_remove:
2313 *
2314 * Remove a thread from its current run queue and
2315 * return the run queue if successful.
2316 *
2317 * Thread must be locked.
2318 */
2319 run_queue_t
2320 run_queue_remove(
2321 thread_t thread)
2322 {
2323 register run_queue_t rq = thread->runq;
2324
2325 /*
2326 * If rq is RUN_QUEUE_NULL, the thread will stay out of the
2327 * run queues because the caller locked the thread. Otherwise
2328 * the thread is on a run queue, but could be chosen for dispatch
2329 * and removed.
2330 */
2331 if (rq != RUN_QUEUE_NULL) {
2332 processor_set_t pset = thread->processor_set;
2333 processor_t processor = thread->bound_processor;
2334
2335 /*
2336 * The run queues are locked by the pset scheduling
2337 * lock, except when a processor is off-line the
2338 * local run queue is locked by the processor lock.
2339 */
2340 if (processor != PROCESSOR_NULL) {
2341 processor_lock(processor);
2342 pset = processor->processor_set;
2343 }
2344
2345 if (pset != PROCESSOR_SET_NULL)
2346 simple_lock(&pset->sched_lock);
2347
2348 if (rq == thread->runq) {
2349 /*
2350 * Thread is on a run queue and we have a lock on
2351 * that run queue.
2352 */
2353 remqueue(&rq->queues[0], (queue_entry_t)thread);
2354 rq->count--;
2355 if (thread->sched_mode & TH_MODE_PREEMPT)
2356 rq->urgency--;
2357 assert(rq->urgency >= 0);
2358
2359 if (queue_empty(rq->queues + thread->sched_pri)) {
2360 /* update run queue status */
2361 if (thread->sched_pri != IDLEPRI)
2362 clrbit(MAXPRI - thread->sched_pri, rq->bitmap);
2363 rq->highq = MAXPRI - ffsbit(rq->bitmap);
2364 }
2365
2366 thread->runq = RUN_QUEUE_NULL;
2367 }
2368 else {
2369 /*
2370 * The thread left the run queue before we could
2371 * lock the run queue.
2372 */
2373 assert(thread->runq == RUN_QUEUE_NULL);
2374 rq = RUN_QUEUE_NULL;
2375 }
2376
2377 if (pset != PROCESSOR_SET_NULL)
2378 simple_unlock(&pset->sched_lock);
2379
2380 if (processor != PROCESSOR_NULL)
2381 processor_unlock(processor);
2382 }
2383
2384 return (rq);
2385 }
2386
2387 /*
2388 * choose_thread:
2389 *
2390 * Remove a thread to execute from the run queues
2391 * and return it.
2392 *
2393 * Called with pset scheduling lock held.
2394 */
2395 static thread_t
2396 choose_thread(
2397 processor_set_t pset,
2398 processor_t processor)
2399 {
2400 register run_queue_t runq;
2401 register thread_t thread;
2402 register queue_t q;
2403
2404 runq = &processor->runq;
2405
2406 if (runq->count > 0 && runq->highq >= pset->runq.highq) {
2407 q = runq->queues + runq->highq;
2408
2409 thread = (thread_t)q->next;
2410 ((queue_entry_t)thread)->next->prev = q;
2411 q->next = ((queue_entry_t)thread)->next;
2412 thread->runq = RUN_QUEUE_NULL;
2413 runq->count--;
2414 if (thread->sched_mode & TH_MODE_PREEMPT)
2415 runq->urgency--;
2416 assert(runq->urgency >= 0);
2417 if (queue_empty(q)) {
2418 if (runq->highq != IDLEPRI)
2419 clrbit(MAXPRI - runq->highq, runq->bitmap);
2420 runq->highq = MAXPRI - ffsbit(runq->bitmap);
2421 }
2422
2423 processor->deadline = UINT64_MAX;
2424
2425 return (thread);
2426 }
2427
2428 runq = &pset->runq;
2429
2430 assert(runq->count > 0);
2431 q = runq->queues + runq->highq;
2432
2433 thread = (thread_t)q->next;
2434 ((queue_entry_t)thread)->next->prev = q;
2435 q->next = ((queue_entry_t)thread)->next;
2436 thread->runq = RUN_QUEUE_NULL;
2437 runq->count--;
2438 if (runq->highq >= BASEPRI_RTQUEUES)
2439 processor->deadline = thread->realtime.deadline;
2440 else
2441 processor->deadline = UINT64_MAX;
2442 if (thread->sched_mode & TH_MODE_PREEMPT)
2443 runq->urgency--;
2444 assert(runq->urgency >= 0);
2445 if (queue_empty(q)) {
2446 if (runq->highq != IDLEPRI)
2447 clrbit(MAXPRI - runq->highq, runq->bitmap);
2448 runq->highq = MAXPRI - ffsbit(runq->bitmap);
2449 }
2450
2451 timeshare_quanta_update(pset);
2452
2453 return (thread);
2454 }
2455
2456 static processor_t
2457 delay_idle(
2458 processor_t processor,
2459 thread_t self)
2460 {
2461 int *gcount, *lcount;
2462 uint64_t abstime, spin, limit;
2463
2464 lcount = &processor->runq.count;
2465 gcount = &processor->processor_set->runq.count;
2466
2467 abstime = mach_absolute_time();
2468 limit = abstime + delay_idle_limit;
2469 spin = abstime + delay_idle_spin;
2470
2471 timer_event((uint32_t)abstime, &processor->idle_thread->system_timer);
2472
2473 self->options |= TH_OPT_DELAYIDLE;
2474
2475 while ( *gcount == 0 && *lcount == 0 &&
2476 (self->state & TH_WAIT) != 0 &&
2477 abstime < limit ) {
2478 if (abstime >= spin) {
2479 (void)spllo();
2480
2481 (void)splsched();
2482 processor = current_processor();
2483 lcount = &processor->runq.count;
2484 gcount = &processor->processor_set->runq.count;
2485
2486 abstime = mach_absolute_time();
2487 spin = abstime + delay_idle_spin;
2488
2489 timer_event((uint32_t)abstime, &processor->idle_thread->system_timer);
2490 }
2491 else
2492 abstime = mach_absolute_time();
2493 }
2494
2495 timer_event((uint32_t)abstime, &self->system_timer);
2496
2497 self->options &= ~TH_OPT_DELAYIDLE;
2498
2499 return (processor);
2500 }
2501
2502 /*
2503 * no_dispatch_count counts number of times processors go non-idle
2504 * without being dispatched. This should be very rare.
2505 */
2506 int no_dispatch_count = 0;
2507
2508 /*
2509 * This is the idle processor thread, which just looks for other threads
2510 * to execute.
2511 */
2512 void
2513 idle_thread(void)
2514 {
2515 register processor_t processor;
2516 register thread_t *threadp;
2517 register int *gcount;
2518 register int *lcount;
2519 register thread_t new_thread;
2520 register int state;
2521 register processor_set_t pset;
2522 ast_t *myast = ast_pending();
2523
2524 processor = current_processor();
2525
2526 threadp = &processor->next_thread;
2527 lcount = &processor->runq.count;
2528 gcount = &processor->processor_set->runq.count;
2529
2530
2531 (void)splsched(); /* Turn interruptions off */
2532
2533 #ifdef __ppc__
2534 pmsDown(); /* Step power down. Note: interruptions must be disabled for this call */
2535 #endif
2536
2537 while ( (*threadp == THREAD_NULL) &&
2538 (*gcount == 0) && (*lcount == 0) ) {
2539
2540 /* check for ASTs while we wait */
2541 if (*myast &~ (AST_SCHEDULING | AST_BSD)) {
2542 /* no ASTs for us */
2543 *myast &= AST_NONE;
2544 (void)spllo();
2545 }
2546 else
2547 machine_idle();
2548
2549 (void)splsched();
2550 }
2551
2552 /*
2553 * This is not a switch statement to avoid the
2554 * bounds checking code in the common case.
2555 */
2556 pset = processor->processor_set;
2557 simple_lock(&pset->sched_lock);
2558
2559 #ifdef __ppc__
2560 pmsStep(0); /* Step up out of idle power, may start timer for next step */
2561 #endif
2562
2563 state = processor->state;
2564 if (state == PROCESSOR_DISPATCHING) {
2565 /*
2566 * Commmon case -- cpu dispatched.
2567 */
2568 new_thread = *threadp;
2569 *threadp = (volatile thread_t) THREAD_NULL;
2570 processor->state = PROCESSOR_RUNNING;
2571 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
2572
2573 if ( pset->runq.highq >= BASEPRI_RTQUEUES &&
2574 new_thread->sched_pri >= BASEPRI_RTQUEUES ) {
2575 register run_queue_t runq = &pset->runq;
2576 register queue_t q;
2577
2578 q = runq->queues + runq->highq;
2579 if (((thread_t)q->next)->realtime.deadline <
2580 processor->deadline) {
2581 thread_t thread = new_thread;
2582
2583 new_thread = (thread_t)q->next;
2584 ((queue_entry_t)new_thread)->next->prev = q;
2585 q->next = ((queue_entry_t)new_thread)->next;
2586 new_thread->runq = RUN_QUEUE_NULL;
2587 processor->deadline = new_thread->realtime.deadline;
2588 assert(new_thread->sched_mode & TH_MODE_PREEMPT);
2589 runq->count--; runq->urgency--;
2590 if (queue_empty(q)) {
2591 if (runq->highq != IDLEPRI)
2592 clrbit(MAXPRI - runq->highq, runq->bitmap);
2593 runq->highq = MAXPRI - ffsbit(runq->bitmap);
2594 }
2595 dispatch_counts.missed_realtime++;
2596 simple_unlock(&pset->sched_lock);
2597
2598 thread_lock(thread);
2599 thread_setrun(thread, SCHED_HEADQ);
2600 thread_unlock(thread);
2601
2602 counter(c_idle_thread_handoff++);
2603 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
2604 /*NOTREACHED*/
2605 }
2606 simple_unlock(&pset->sched_lock);
2607
2608 counter(c_idle_thread_handoff++);
2609 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
2610 /*NOTREACHED*/
2611 }
2612
2613 if ( processor->runq.highq > new_thread->sched_pri ||
2614 pset->runq.highq > new_thread->sched_pri ) {
2615 thread_t thread = new_thread;
2616
2617 new_thread = choose_thread(pset, processor);
2618 dispatch_counts.missed_other++;
2619 simple_unlock(&pset->sched_lock);
2620
2621 thread_lock(thread);
2622 thread_setrun(thread, SCHED_HEADQ);
2623 thread_unlock(thread);
2624
2625 counter(c_idle_thread_handoff++);
2626 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
2627 /* NOTREACHED */
2628 }
2629 else {
2630 simple_unlock(&pset->sched_lock);
2631
2632 counter(c_idle_thread_handoff++);
2633 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
2634 /* NOTREACHED */
2635 }
2636 }
2637 else
2638 if (state == PROCESSOR_IDLE) {
2639 /*
2640 * Processor was not dispatched (Rare).
2641 * Set it running again and force a
2642 * reschedule.
2643 */
2644 no_dispatch_count++;
2645 pset->idle_count--;
2646 remqueue(&pset->idle_queue, (queue_entry_t)processor);
2647 processor->state = PROCESSOR_RUNNING;
2648 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
2649 simple_unlock(&pset->sched_lock);
2650
2651 counter(c_idle_thread_block++);
2652 thread_block((thread_continue_t)idle_thread);
2653 /* NOTREACHED */
2654 }
2655 else
2656 if (state == PROCESSOR_SHUTDOWN) {
2657 /*
2658 * Going off-line. Force a
2659 * reschedule.
2660 */
2661 if ((new_thread = (thread_t)*threadp) != THREAD_NULL) {
2662 *threadp = (volatile thread_t) THREAD_NULL;
2663 processor->deadline = UINT64_MAX;
2664 simple_unlock(&pset->sched_lock);
2665
2666 thread_lock(new_thread);
2667 thread_setrun(new_thread, SCHED_HEADQ);
2668 thread_unlock(new_thread);
2669 }
2670 else
2671 simple_unlock(&pset->sched_lock);
2672
2673 counter(c_idle_thread_block++);
2674 thread_block((thread_continue_t)idle_thread);
2675 /* NOTREACHED */
2676 }
2677
2678 simple_unlock(&pset->sched_lock);
2679
2680 panic("idle_thread: state %d\n", processor->state);
2681 /*NOTREACHED*/
2682 }
2683
2684 kern_return_t
2685 idle_thread_create(
2686 processor_t processor)
2687 {
2688 kern_return_t result;
2689 thread_t thread;
2690 spl_t s;
2691
2692 result = kernel_thread_create((thread_continue_t)idle_thread, NULL, MAXPRI_KERNEL, &thread);
2693 if (result != KERN_SUCCESS)
2694 return (result);
2695
2696 s = splsched();
2697 thread_lock(thread);
2698 thread->bound_processor = processor;
2699 processor->idle_thread = thread;
2700 thread->sched_pri = thread->priority = IDLEPRI;
2701 thread->state = (TH_RUN | TH_IDLE);
2702 thread_unlock(thread);
2703 splx(s);
2704
2705 thread_deallocate(thread);
2706
2707 return (KERN_SUCCESS);
2708 }
2709
2710 static uint64_t sched_tick_deadline;
2711
2712 /*
2713 * sched_startup:
2714 *
2715 * Kicks off scheduler services.
2716 *
2717 * Called at splsched.
2718 */
2719 void
2720 sched_startup(void)
2721 {
2722 kern_return_t result;
2723 thread_t thread;
2724
2725 result = kernel_thread_start_priority((thread_continue_t)sched_tick_thread, NULL, MAXPRI_KERNEL, &thread);
2726 if (result != KERN_SUCCESS)
2727 panic("sched_startup");
2728
2729 thread_deallocate(thread);
2730
2731 /*
2732 * Yield to the sched_tick_thread while it times
2733 * a series of context switches back. It stores
2734 * the baseline value in sched_cswtime.
2735 *
2736 * The current thread is the only other thread
2737 * active at this point.
2738 */
2739 while (sched_cswtime == 0)
2740 thread_block(THREAD_CONTINUE_NULL);
2741
2742 thread_daemon_init();
2743
2744 thread_call_initialize();
2745 }
2746
2747 /*
2748 * sched_tick_thread:
2749 *
2750 * Perform periodic bookkeeping functions about ten
2751 * times per second.
2752 */
2753 static void
2754 sched_tick_continue(void)
2755 {
2756 uint64_t abstime = mach_absolute_time();
2757
2758 sched_tick++;
2759
2760 /*
2761 * Compute various averages.
2762 */
2763 compute_averages();
2764
2765 /*
2766 * Scan the run queues for threads which
2767 * may need to be updated.
2768 */
2769 thread_update_scan();
2770
2771 clock_deadline_for_periodic_event(sched_tick_interval, abstime,
2772 &sched_tick_deadline);
2773
2774 assert_wait_deadline((event_t)sched_tick_thread, THREAD_UNINT, sched_tick_deadline);
2775 thread_block((thread_continue_t)sched_tick_continue);
2776 /*NOTREACHED*/
2777 }
2778
2779 /*
2780 * Time a series of context switches to determine
2781 * a baseline. Toss the high and low and return
2782 * the one-way value.
2783 */
2784 static uint32_t
2785 time_cswitch(void)
2786 {
2787 uint32_t new, hi, low, accum;
2788 uint64_t abstime;
2789 int i, tries = 7;
2790
2791 accum = hi = low = 0;
2792 for (i = 0; i < tries; ++i) {
2793 abstime = mach_absolute_time();
2794 thread_block(THREAD_CONTINUE_NULL);
2795
2796 new = mach_absolute_time() - abstime;
2797
2798 if (i == 0)
2799 accum = hi = low = new;
2800 else {
2801 if (new < low)
2802 low = new;
2803 else
2804 if (new > hi)
2805 hi = new;
2806 accum += new;
2807 }
2808 }
2809
2810 return ((accum - hi - low) / (2 * (tries - 2)));
2811 }
2812
2813 void
2814 sched_tick_thread(void)
2815 {
2816 sched_cswtime = time_cswitch();
2817
2818 sched_tick_deadline = mach_absolute_time();
2819
2820 sched_tick_continue();
2821 /*NOTREACHED*/
2822 }
2823
2824 /*
2825 * thread_update_scan / runq_scan:
2826 *
2827 * Scan the run queues to account for timesharing threads
2828 * which need to be updated.
2829 *
2830 * Scanner runs in two passes. Pass one squirrels likely
2831 * threads away in an array, pass two does the update.
2832 *
2833 * This is necessary because the run queue is locked for
2834 * the candidate scan, but the thread is locked for the update.
2835 *
2836 * Array should be sized to make forward progress, without
2837 * disabling preemption for long periods.
2838 */
2839
2840 #define THREAD_UPDATE_SIZE 128
2841
2842 static thread_t thread_update_array[THREAD_UPDATE_SIZE];
2843 static int thread_update_count = 0;
2844
2845 /*
2846 * Scan a runq for candidate threads.
2847 *
2848 * Returns TRUE if retry is needed.
2849 */
2850 static boolean_t
2851 runq_scan(
2852 run_queue_t runq)
2853 {
2854 register int count;
2855 register queue_t q;
2856 register thread_t thread;
2857
2858 if ((count = runq->count) > 0) {
2859 q = runq->queues + runq->highq;
2860 while (count > 0) {
2861 queue_iterate(q, thread, thread_t, links) {
2862 if ( thread->sched_stamp != sched_tick &&
2863 (thread->sched_mode & TH_MODE_TIMESHARE) ) {
2864 if (thread_update_count == THREAD_UPDATE_SIZE)
2865 return (TRUE);
2866
2867 thread_update_array[thread_update_count++] = thread;
2868 thread_reference_internal(thread);
2869 }
2870
2871 count--;
2872 }
2873
2874 q--;
2875 }
2876 }
2877
2878 return (FALSE);
2879 }
2880
2881 static void
2882 thread_update_scan(void)
2883 {
2884 register boolean_t restart_needed;
2885 register processor_set_t pset = &default_pset;
2886 register processor_t processor;
2887 register thread_t thread;
2888 spl_t s;
2889
2890 do {
2891 s = splsched();
2892 simple_lock(&pset->sched_lock);
2893 restart_needed = runq_scan(&pset->runq);
2894 simple_unlock(&pset->sched_lock);
2895
2896 if (!restart_needed) {
2897 simple_lock(&pset->sched_lock);
2898 processor = (processor_t)queue_first(&pset->processors);
2899 while (!queue_end(&pset->processors, (queue_entry_t)processor)) {
2900 if ((restart_needed = runq_scan(&processor->runq)) != 0)
2901 break;
2902
2903 thread = processor->idle_thread;
2904 if (thread->sched_stamp != sched_tick) {
2905 if (thread_update_count == THREAD_UPDATE_SIZE) {
2906 restart_needed = TRUE;
2907 break;
2908 }
2909
2910 thread_update_array[thread_update_count++] = thread;
2911 thread_reference_internal(thread);
2912 }
2913
2914 processor = (processor_t)queue_next(&processor->processors);
2915 }
2916 simple_unlock(&pset->sched_lock);
2917 }
2918 splx(s);
2919
2920 /*
2921 * Ok, we now have a collection of candidates -- fix them.
2922 */
2923 while (thread_update_count > 0) {
2924 thread = thread_update_array[--thread_update_count];
2925 thread_update_array[thread_update_count] = THREAD_NULL;
2926
2927 s = splsched();
2928 thread_lock(thread);
2929 if ( !(thread->state & (TH_WAIT|TH_SUSP)) &&
2930 thread->sched_stamp != sched_tick )
2931 update_priority(thread);
2932 thread_unlock(thread);
2933 splx(s);
2934
2935 thread_deallocate(thread);
2936 }
2937 } while (restart_needed);
2938 }
2939
2940 /*
2941 * Just in case someone doesn't use the macro
2942 */
2943 #undef thread_wakeup
2944 void
2945 thread_wakeup(
2946 event_t x);
2947
2948 void
2949 thread_wakeup(
2950 event_t x)
2951 {
2952 thread_wakeup_with_result(x, THREAD_AWAKENED);
2953 }
2954
2955 boolean_t
2956 preemption_enabled(void)
2957 {
2958 return (get_preemption_level() == 0 && ml_get_interrupts_enabled());
2959 }
2960
2961 #if DEBUG
2962 static boolean_t
2963 thread_runnable(
2964 thread_t thread)
2965 {
2966 return ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN);
2967 }
2968 #endif /* DEBUG */
2969
2970 #if MACH_KDB
2971 #include <ddb/db_output.h>
2972 #define printf kdbprintf
2973 void db_sched(void);
2974
2975 void
2976 db_sched(void)
2977 {
2978 iprintf("Scheduling Statistics:\n");
2979 db_indent += 2;
2980 iprintf("Thread invocations: csw %d same %d\n",
2981 c_thread_invoke_csw, c_thread_invoke_same);
2982 #if MACH_COUNTERS
2983 iprintf("Thread block: calls %d\n",
2984 c_thread_block_calls);
2985 iprintf("Idle thread:\n\thandoff %d block %d no_dispatch %d\n",
2986 c_idle_thread_handoff,
2987 c_idle_thread_block, no_dispatch_count);
2988 iprintf("Sched thread blocks: %d\n", c_sched_thread_block);
2989 #endif /* MACH_COUNTERS */
2990 db_indent -= 2;
2991 }
2992
2993 #include <ddb/db_output.h>
2994 void db_show_thread_log(void);
2995
2996 void
2997 db_show_thread_log(void)
2998 {
2999 }
3000 #endif /* MACH_KDB */