]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/sched_prim.c
ca4a1354f4eddfdda8621993f8dd78c2e996f6b0
[apple/xnu.git] / osfmk / kern / sched_prim.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: sched_prim.c
60 * Author: Avadis Tevanian, Jr.
61 * Date: 1986
62 *
63 * Scheduling primitives
64 *
65 */
66
67 #include <debug.h>
68 #include <mach_kdb.h>
69
70 #include <ddb/db_output.h>
71
72 #include <mach/mach_types.h>
73 #include <mach/machine.h>
74 #include <mach/policy.h>
75 #include <mach/sync_policy.h>
76
77 #include <machine/machine_routines.h>
78 #include <machine/sched_param.h>
79 #include <machine/machine_cpu.h>
80
81 #include <kern/kern_types.h>
82 #include <kern/clock.h>
83 #include <kern/counters.h>
84 #include <kern/cpu_number.h>
85 #include <kern/cpu_data.h>
86 #include <kern/debug.h>
87 #include <kern/lock.h>
88 #include <kern/macro_help.h>
89 #include <kern/machine.h>
90 #include <kern/misc_protos.h>
91 #include <kern/processor.h>
92 #include <kern/queue.h>
93 #include <kern/sched.h>
94 #include <kern/sched_prim.h>
95 #include <kern/syscall_subr.h>
96 #include <kern/task.h>
97 #include <kern/thread.h>
98 #include <kern/wait_queue.h>
99
100 #include <vm/pmap.h>
101 #include <vm/vm_kern.h>
102 #include <vm/vm_map.h>
103
104 #include <mach/sdt.h>
105
106 #include <sys/kdebug.h>
107
108 #include <kern/pms.h>
109
110 struct run_queue rt_runq;
111 #define RT_RUNQ ((processor_t)-1)
112 decl_simple_lock_data(static,rt_lock);
113
114 #define DEFAULT_PREEMPTION_RATE 100 /* (1/s) */
115 int default_preemption_rate = DEFAULT_PREEMPTION_RATE;
116
117 #define MAX_UNSAFE_QUANTA 800
118 int max_unsafe_quanta = MAX_UNSAFE_QUANTA;
119
120 #define MAX_POLL_QUANTA 2
121 int max_poll_quanta = MAX_POLL_QUANTA;
122
123 #define SCHED_POLL_YIELD_SHIFT 4 /* 1/16 */
124 int sched_poll_yield_shift = SCHED_POLL_YIELD_SHIFT;
125
126 uint64_t max_unsafe_computation;
127 uint32_t sched_safe_duration;
128 uint64_t max_poll_computation;
129
130 uint32_t std_quantum;
131 uint32_t min_std_quantum;
132
133 uint32_t std_quantum_us;
134
135 uint32_t max_rt_quantum;
136 uint32_t min_rt_quantum;
137
138 uint32_t sched_cswtime;
139
140 unsigned sched_tick;
141 uint32_t sched_tick_interval;
142
143 uint32_t sched_pri_shift = INT8_MAX;
144 uint32_t sched_fixed_shift;
145
146 uint32_t sched_run_count, sched_share_count;
147 uint32_t sched_load_average, sched_mach_factor;
148
149 /* Forwards */
150 static void load_shift_init(void) __attribute__((section("__TEXT, initcode")));
151 static void preempt_pri_init(void) __attribute__((section("__TEXT, initcode")));
152
153 static thread_t run_queue_dequeue(
154 run_queue_t runq,
155 integer_t options);
156
157 static thread_t choose_thread(
158 processor_t processor,
159 int priority);
160
161 static thread_t thread_select_idle(
162 thread_t thread,
163 processor_t processor);
164
165 static thread_t processor_idle(
166 thread_t thread,
167 processor_t processor);
168
169 static thread_t steal_thread(
170 processor_set_t pset);
171
172 static thread_t steal_processor_thread(
173 processor_t processor);
174
175 static void thread_update_scan(void);
176
177 #if DEBUG
178 extern int debug_task;
179 #define TLOG(a, fmt, args...) if(debug_task & a) kprintf(fmt, ## args)
180 #else
181 #define TLOG(a, fmt, args...) do {} while (0)
182 #endif
183
184 #if DEBUG
185 static
186 boolean_t thread_runnable(
187 thread_t thread);
188
189 #endif /*DEBUG*/
190
191 /*
192 * State machine
193 *
194 * states are combinations of:
195 * R running
196 * W waiting (or on wait queue)
197 * N non-interruptible
198 * O swapped out
199 * I being swapped in
200 *
201 * init action
202 * assert_wait thread_block clear_wait swapout swapin
203 *
204 * R RW, RWN R; setrun - -
205 * RN RWN RN; setrun - -
206 *
207 * RW W R -
208 * RWN WN RN -
209 *
210 * W R; setrun WO
211 * WN RN; setrun -
212 *
213 * RO - - R
214 *
215 */
216
217 int8_t sched_load_shifts[NRQS];
218 int sched_preempt_pri[NRQBM];
219
220 void
221 sched_init(void)
222 {
223 /*
224 * Calculate the timeslicing quantum
225 * in us.
226 */
227 if (default_preemption_rate < 1)
228 default_preemption_rate = DEFAULT_PREEMPTION_RATE;
229 std_quantum_us = (1000 * 1000) / default_preemption_rate;
230
231 printf("standard timeslicing quantum is %d us\n", std_quantum_us);
232
233 sched_safe_duration = (2 * max_unsafe_quanta / default_preemption_rate) *
234 (1 << SCHED_TICK_SHIFT);
235
236 load_shift_init();
237 preempt_pri_init();
238 simple_lock_init(&rt_lock, 0);
239 run_queue_init(&rt_runq);
240 sched_tick = 0;
241 ast_init();
242 }
243
244 void
245 sched_timebase_init(void)
246 {
247 uint64_t abstime;
248 uint32_t shift;
249
250 /* standard timeslicing quantum */
251 clock_interval_to_absolutetime_interval(
252 std_quantum_us, NSEC_PER_USEC, &abstime);
253 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
254 std_quantum = (uint32_t)abstime;
255
256 /* smallest remaining quantum (250 us) */
257 clock_interval_to_absolutetime_interval(250, NSEC_PER_USEC, &abstime);
258 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
259 min_std_quantum = (uint32_t)abstime;
260
261 /* smallest rt computaton (50 us) */
262 clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime);
263 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
264 min_rt_quantum = (uint32_t)abstime;
265
266 /* maximum rt computation (50 ms) */
267 clock_interval_to_absolutetime_interval(
268 50, 1000*NSEC_PER_USEC, &abstime);
269 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
270 max_rt_quantum = (uint32_t)abstime;
271
272 /* scheduler tick interval */
273 clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT,
274 NSEC_PER_USEC, &abstime);
275 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
276 sched_tick_interval = (uint32_t)abstime;
277
278 /*
279 * Compute conversion factor from usage to
280 * timesharing priorities with 5/8 ** n aging.
281 */
282 abstime = (abstime * 5) / 3;
283 for (shift = 0; abstime > BASEPRI_DEFAULT; ++shift)
284 abstime >>= 1;
285 sched_fixed_shift = shift;
286
287 max_unsafe_computation = max_unsafe_quanta * std_quantum;
288 max_poll_computation = max_poll_quanta * std_quantum;
289 }
290
291 /*
292 * Set up values for timeshare
293 * loading factors.
294 */
295 static void
296 load_shift_init(void)
297 {
298 int8_t k, *p = sched_load_shifts;
299 uint32_t i, j;
300
301 *p++ = INT8_MIN; *p++ = 0;
302
303 for (i = j = 2, k = 1; i < NRQS; ++k) {
304 for (j <<= 1; i < j; ++i)
305 *p++ = k;
306 }
307 }
308
309 static void
310 preempt_pri_init(void)
311 {
312 int i, *p = sched_preempt_pri;
313
314 for (i = BASEPRI_FOREGROUND + 1; i < MINPRI_KERNEL; ++i)
315 setbit(i, p);
316
317 for (i = BASEPRI_PREEMPT; i <= MAXPRI; ++i)
318 setbit(i, p);
319 }
320
321 /*
322 * Thread wait timer expiration.
323 */
324 void
325 thread_timer_expire(
326 void *p0,
327 __unused void *p1)
328 {
329 thread_t thread = p0;
330 spl_t s;
331
332 s = splsched();
333 thread_lock(thread);
334 if (--thread->wait_timer_active == 0) {
335 if (thread->wait_timer_is_set) {
336 thread->wait_timer_is_set = FALSE;
337 clear_wait_internal(thread, THREAD_TIMED_OUT);
338 }
339 }
340 thread_unlock(thread);
341 splx(s);
342 }
343
344 #ifndef __LP64__
345
346 /*
347 * thread_set_timer:
348 *
349 * Set a timer for the current thread, if the thread
350 * is ready to wait. Must be called between assert_wait()
351 * and thread_block().
352 */
353 void
354 thread_set_timer(
355 uint32_t interval,
356 uint32_t scale_factor)
357 {
358 thread_t thread = current_thread();
359 uint64_t deadline;
360 spl_t s;
361
362 s = splsched();
363 thread_lock(thread);
364 if ((thread->state & TH_WAIT) != 0) {
365 clock_interval_to_deadline(interval, scale_factor, &deadline);
366 if (!timer_call_enter(&thread->wait_timer, deadline))
367 thread->wait_timer_active++;
368 thread->wait_timer_is_set = TRUE;
369 }
370 thread_unlock(thread);
371 splx(s);
372 }
373
374 void
375 thread_set_timer_deadline(
376 uint64_t deadline)
377 {
378 thread_t thread = current_thread();
379 spl_t s;
380
381 s = splsched();
382 thread_lock(thread);
383 if ((thread->state & TH_WAIT) != 0) {
384 if (!timer_call_enter(&thread->wait_timer, deadline))
385 thread->wait_timer_active++;
386 thread->wait_timer_is_set = TRUE;
387 }
388 thread_unlock(thread);
389 splx(s);
390 }
391
392 void
393 thread_cancel_timer(void)
394 {
395 thread_t thread = current_thread();
396 spl_t s;
397
398 s = splsched();
399 thread_lock(thread);
400 if (thread->wait_timer_is_set) {
401 if (timer_call_cancel(&thread->wait_timer))
402 thread->wait_timer_active--;
403 thread->wait_timer_is_set = FALSE;
404 }
405 thread_unlock(thread);
406 splx(s);
407 }
408
409 #endif /* __LP64__ */
410
411 /*
412 * thread_unblock:
413 *
414 * Unblock thread on wake up.
415 *
416 * Returns TRUE if the thread is still running.
417 *
418 * Thread must be locked.
419 */
420 boolean_t
421 thread_unblock(
422 thread_t thread,
423 wait_result_t wresult)
424 {
425 boolean_t result = FALSE;
426
427 /*
428 * Set wait_result.
429 */
430 thread->wait_result = wresult;
431
432 /*
433 * Cancel pending wait timer.
434 */
435 if (thread->wait_timer_is_set) {
436 if (timer_call_cancel(&thread->wait_timer))
437 thread->wait_timer_active--;
438 thread->wait_timer_is_set = FALSE;
439 }
440
441 /*
442 * Update scheduling state: not waiting,
443 * set running.
444 */
445 thread->state &= ~(TH_WAIT|TH_UNINT);
446
447 if (!(thread->state & TH_RUN)) {
448 thread->state |= TH_RUN;
449
450 (*thread->sched_call)(SCHED_CALL_UNBLOCK, thread);
451
452 /*
453 * Update run counts.
454 */
455 sched_run_incr();
456 if (thread->sched_mode & TH_MODE_TIMESHARE)
457 sched_share_incr();
458 }
459 else {
460 /*
461 * Signal if idling on another processor.
462 */
463 if (thread->state & TH_IDLE) {
464 processor_t processor = thread->last_processor;
465
466 if (processor != current_processor())
467 machine_signal_idle(processor);
468 }
469
470 result = TRUE;
471 }
472
473 /*
474 * Calculate deadline for real-time threads.
475 */
476 if (thread->sched_mode & TH_MODE_REALTIME) {
477 thread->realtime.deadline = mach_absolute_time();
478 thread->realtime.deadline += thread->realtime.constraint;
479 }
480
481 /*
482 * Clear old quantum, fail-safe computation, etc.
483 */
484 thread->current_quantum = 0;
485 thread->computation_metered = 0;
486 thread->reason = AST_NONE;
487
488 KERNEL_DEBUG_CONSTANT(
489 MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE,
490 (uintptr_t)thread_tid(thread), thread->sched_pri, 0, 0, 0);
491
492 DTRACE_SCHED2(wakeup, struct thread *, thread, struct proc *, thread->task->bsd_info);
493
494 return (result);
495 }
496
497 /*
498 * Routine: thread_go
499 * Purpose:
500 * Unblock and dispatch thread.
501 * Conditions:
502 * thread lock held, IPC locks may be held.
503 * thread must have been pulled from wait queue under same lock hold.
504 * Returns:
505 * KERN_SUCCESS - Thread was set running
506 * KERN_NOT_WAITING - Thread was not waiting
507 */
508 kern_return_t
509 thread_go(
510 thread_t thread,
511 wait_result_t wresult)
512 {
513 assert(thread->at_safe_point == FALSE);
514 assert(thread->wait_event == NO_EVENT64);
515 assert(thread->wait_queue == WAIT_QUEUE_NULL);
516
517 if ((thread->state & (TH_WAIT|TH_TERMINATE)) == TH_WAIT) {
518 if (!thread_unblock(thread, wresult))
519 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
520
521 return (KERN_SUCCESS);
522 }
523
524 return (KERN_NOT_WAITING);
525 }
526
527 /*
528 * Routine: thread_mark_wait_locked
529 * Purpose:
530 * Mark a thread as waiting. If, given the circumstances,
531 * it doesn't want to wait (i.e. already aborted), then
532 * indicate that in the return value.
533 * Conditions:
534 * at splsched() and thread is locked.
535 */
536 __private_extern__
537 wait_result_t
538 thread_mark_wait_locked(
539 thread_t thread,
540 wait_interrupt_t interruptible)
541 {
542 boolean_t at_safe_point;
543
544 assert(thread == current_thread());
545
546 /*
547 * The thread may have certain types of interrupts/aborts masked
548 * off. Even if the wait location says these types of interrupts
549 * are OK, we have to honor mask settings (outer-scoped code may
550 * not be able to handle aborts at the moment).
551 */
552 if (interruptible > (thread->options & TH_OPT_INTMASK))
553 interruptible = thread->options & TH_OPT_INTMASK;
554
555 at_safe_point = (interruptible == THREAD_ABORTSAFE);
556
557 if ( interruptible == THREAD_UNINT ||
558 !(thread->sched_mode & TH_MODE_ABORT) ||
559 (!at_safe_point &&
560 (thread->sched_mode & TH_MODE_ABORTSAFELY))) {
561
562 DTRACE_SCHED(sleep);
563
564 thread->state |= (interruptible) ? TH_WAIT : (TH_WAIT | TH_UNINT);
565 thread->at_safe_point = at_safe_point;
566 return (thread->wait_result = THREAD_WAITING);
567 }
568 else
569 if (thread->sched_mode & TH_MODE_ABORTSAFELY)
570 thread->sched_mode &= ~TH_MODE_ISABORTED;
571
572 return (thread->wait_result = THREAD_INTERRUPTED);
573 }
574
575 /*
576 * Routine: thread_interrupt_level
577 * Purpose:
578 * Set the maximum interruptible state for the
579 * current thread. The effective value of any
580 * interruptible flag passed into assert_wait
581 * will never exceed this.
582 *
583 * Useful for code that must not be interrupted,
584 * but which calls code that doesn't know that.
585 * Returns:
586 * The old interrupt level for the thread.
587 */
588 __private_extern__
589 wait_interrupt_t
590 thread_interrupt_level(
591 wait_interrupt_t new_level)
592 {
593 thread_t thread = current_thread();
594 wait_interrupt_t result = thread->options & TH_OPT_INTMASK;
595
596 thread->options = (thread->options & ~TH_OPT_INTMASK) | (new_level & TH_OPT_INTMASK);
597
598 return result;
599 }
600
601 /*
602 * Check to see if an assert wait is possible, without actually doing one.
603 * This is used by debug code in locks and elsewhere to verify that it is
604 * always OK to block when trying to take a blocking lock (since waiting
605 * for the actual assert_wait to catch the case may make it hard to detect
606 * this case.
607 */
608 boolean_t
609 assert_wait_possible(void)
610 {
611
612 thread_t thread;
613
614 #if DEBUG
615 if(debug_mode) return TRUE; /* Always succeed in debug mode */
616 #endif
617
618 thread = current_thread();
619
620 return (thread == NULL || wait_queue_assert_possible(thread));
621 }
622
623 /*
624 * assert_wait:
625 *
626 * Assert that the current thread is about to go to
627 * sleep until the specified event occurs.
628 */
629 wait_result_t
630 assert_wait(
631 event_t event,
632 wait_interrupt_t interruptible)
633 {
634 register wait_queue_t wq;
635 register int index;
636
637 assert(event != NO_EVENT);
638
639 index = wait_hash(event);
640 wq = &wait_queues[index];
641 return wait_queue_assert_wait(wq, event, interruptible, 0);
642 }
643
644 wait_result_t
645 assert_wait_timeout(
646 event_t event,
647 wait_interrupt_t interruptible,
648 uint32_t interval,
649 uint32_t scale_factor)
650 {
651 thread_t thread = current_thread();
652 wait_result_t wresult;
653 wait_queue_t wqueue;
654 uint64_t deadline;
655 spl_t s;
656
657 assert(event != NO_EVENT);
658 wqueue = &wait_queues[wait_hash(event)];
659
660 s = splsched();
661 wait_queue_lock(wqueue);
662 thread_lock(thread);
663
664 clock_interval_to_deadline(interval, scale_factor, &deadline);
665 wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t, event),
666 interruptible, deadline, thread);
667
668 thread_unlock(thread);
669 wait_queue_unlock(wqueue);
670 splx(s);
671
672 return (wresult);
673 }
674
675 wait_result_t
676 assert_wait_deadline(
677 event_t event,
678 wait_interrupt_t interruptible,
679 uint64_t deadline)
680 {
681 thread_t thread = current_thread();
682 wait_result_t wresult;
683 wait_queue_t wqueue;
684 spl_t s;
685
686 assert(event != NO_EVENT);
687 wqueue = &wait_queues[wait_hash(event)];
688
689 s = splsched();
690 wait_queue_lock(wqueue);
691 thread_lock(thread);
692
693 wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t,event),
694 interruptible, deadline, thread);
695
696 thread_unlock(thread);
697 wait_queue_unlock(wqueue);
698 splx(s);
699
700 return (wresult);
701 }
702
703 /*
704 * thread_sleep_fast_usimple_lock:
705 *
706 * Cause the current thread to wait until the specified event
707 * occurs. The specified simple_lock is unlocked before releasing
708 * the cpu and re-acquired as part of waking up.
709 *
710 * This is the simple lock sleep interface for components that use a
711 * faster version of simple_lock() than is provided by usimple_lock().
712 */
713 __private_extern__ wait_result_t
714 thread_sleep_fast_usimple_lock(
715 event_t event,
716 simple_lock_t lock,
717 wait_interrupt_t interruptible)
718 {
719 wait_result_t res;
720
721 res = assert_wait(event, interruptible);
722 if (res == THREAD_WAITING) {
723 simple_unlock(lock);
724 res = thread_block(THREAD_CONTINUE_NULL);
725 simple_lock(lock);
726 }
727 return res;
728 }
729
730
731 /*
732 * thread_sleep_usimple_lock:
733 *
734 * Cause the current thread to wait until the specified event
735 * occurs. The specified usimple_lock is unlocked before releasing
736 * the cpu and re-acquired as part of waking up.
737 *
738 * This is the simple lock sleep interface for components where
739 * simple_lock() is defined in terms of usimple_lock().
740 */
741 wait_result_t
742 thread_sleep_usimple_lock(
743 event_t event,
744 usimple_lock_t lock,
745 wait_interrupt_t interruptible)
746 {
747 wait_result_t res;
748
749 res = assert_wait(event, interruptible);
750 if (res == THREAD_WAITING) {
751 usimple_unlock(lock);
752 res = thread_block(THREAD_CONTINUE_NULL);
753 usimple_lock(lock);
754 }
755 return res;
756 }
757
758 /*
759 * thread_sleep_lock_write:
760 *
761 * Cause the current thread to wait until the specified event
762 * occurs. The specified (write) lock is unlocked before releasing
763 * the cpu. The (write) lock will be re-acquired before returning.
764 */
765 wait_result_t
766 thread_sleep_lock_write(
767 event_t event,
768 lock_t *lock,
769 wait_interrupt_t interruptible)
770 {
771 wait_result_t res;
772
773 res = assert_wait(event, interruptible);
774 if (res == THREAD_WAITING) {
775 lock_write_done(lock);
776 res = thread_block(THREAD_CONTINUE_NULL);
777 lock_write(lock);
778 }
779 return res;
780 }
781
782 /*
783 * thread_stop:
784 *
785 * Force a preemption point for a thread and wait
786 * for it to stop running. Arbitrates access among
787 * multiple stop requests. (released by unstop)
788 *
789 * The thread must enter a wait state and stop via a
790 * separate means.
791 *
792 * Returns FALSE if interrupted.
793 */
794 boolean_t
795 thread_stop(
796 thread_t thread)
797 {
798 wait_result_t wresult;
799 spl_t s = splsched();
800
801 wake_lock(thread);
802 thread_lock(thread);
803
804 while (thread->state & TH_SUSP) {
805 thread->wake_active = TRUE;
806 thread_unlock(thread);
807
808 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
809 wake_unlock(thread);
810 splx(s);
811
812 if (wresult == THREAD_WAITING)
813 wresult = thread_block(THREAD_CONTINUE_NULL);
814
815 if (wresult != THREAD_AWAKENED)
816 return (FALSE);
817
818 s = splsched();
819 wake_lock(thread);
820 thread_lock(thread);
821 }
822
823 thread->state |= TH_SUSP;
824
825 while (thread->state & TH_RUN) {
826 processor_t processor = thread->last_processor;
827
828 if (processor != PROCESSOR_NULL && processor->active_thread == thread)
829 cause_ast_check(processor);
830
831 thread->wake_active = TRUE;
832 thread_unlock(thread);
833
834 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
835 wake_unlock(thread);
836 splx(s);
837
838 if (wresult == THREAD_WAITING)
839 wresult = thread_block(THREAD_CONTINUE_NULL);
840
841 if (wresult != THREAD_AWAKENED) {
842 thread_unstop(thread);
843 return (FALSE);
844 }
845
846 s = splsched();
847 wake_lock(thread);
848 thread_lock(thread);
849 }
850
851 thread_unlock(thread);
852 wake_unlock(thread);
853 splx(s);
854
855 return (TRUE);
856 }
857
858 /*
859 * thread_unstop:
860 *
861 * Release a previous stop request and set
862 * the thread running if appropriate.
863 *
864 * Use only after a successful stop operation.
865 */
866 void
867 thread_unstop(
868 thread_t thread)
869 {
870 spl_t s = splsched();
871
872 wake_lock(thread);
873 thread_lock(thread);
874
875 if ((thread->state & (TH_RUN|TH_WAIT|TH_SUSP)) == TH_SUSP) {
876 thread->state &= ~TH_SUSP;
877 thread_unblock(thread, THREAD_AWAKENED);
878
879 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
880 }
881 else
882 if (thread->state & TH_SUSP) {
883 thread->state &= ~TH_SUSP;
884
885 if (thread->wake_active) {
886 thread->wake_active = FALSE;
887 thread_unlock(thread);
888
889 thread_wakeup(&thread->wake_active);
890 wake_unlock(thread);
891 splx(s);
892
893 return;
894 }
895 }
896
897 thread_unlock(thread);
898 wake_unlock(thread);
899 splx(s);
900 }
901
902 /*
903 * thread_wait:
904 *
905 * Wait for a thread to stop running. (non-interruptible)
906 *
907 */
908 void
909 thread_wait(
910 thread_t thread)
911 {
912 wait_result_t wresult;
913 spl_t s = splsched();
914
915 wake_lock(thread);
916 thread_lock(thread);
917
918 while (thread->state & TH_RUN) {
919 processor_t processor = thread->last_processor;
920
921 if (processor != PROCESSOR_NULL && processor->active_thread == thread)
922 cause_ast_check(processor);
923
924 thread->wake_active = TRUE;
925 thread_unlock(thread);
926
927 wresult = assert_wait(&thread->wake_active, THREAD_UNINT);
928 wake_unlock(thread);
929 splx(s);
930
931 if (wresult == THREAD_WAITING)
932 thread_block(THREAD_CONTINUE_NULL);
933
934 s = splsched();
935 wake_lock(thread);
936 thread_lock(thread);
937 }
938
939 thread_unlock(thread);
940 wake_unlock(thread);
941 splx(s);
942 }
943
944 /*
945 * Routine: clear_wait_internal
946 *
947 * Clear the wait condition for the specified thread.
948 * Start the thread executing if that is appropriate.
949 * Arguments:
950 * thread thread to awaken
951 * result Wakeup result the thread should see
952 * Conditions:
953 * At splsched
954 * the thread is locked.
955 * Returns:
956 * KERN_SUCCESS thread was rousted out a wait
957 * KERN_FAILURE thread was waiting but could not be rousted
958 * KERN_NOT_WAITING thread was not waiting
959 */
960 __private_extern__ kern_return_t
961 clear_wait_internal(
962 thread_t thread,
963 wait_result_t wresult)
964 {
965 wait_queue_t wq = thread->wait_queue;
966 int i = LockTimeOut;
967
968 do {
969 if (wresult == THREAD_INTERRUPTED && (thread->state & TH_UNINT))
970 return (KERN_FAILURE);
971
972 if (wq != WAIT_QUEUE_NULL) {
973 if (wait_queue_lock_try(wq)) {
974 wait_queue_pull_thread_locked(wq, thread, TRUE);
975 /* wait queue unlocked, thread still locked */
976 }
977 else {
978 thread_unlock(thread);
979 delay(1);
980
981 thread_lock(thread);
982 if (wq != thread->wait_queue)
983 return (KERN_NOT_WAITING);
984
985 continue;
986 }
987 }
988
989 return (thread_go(thread, wresult));
990 } while (--i > 0);
991
992 panic("clear_wait_internal: deadlock: thread=%p, wq=%p, cpu=%d\n",
993 thread, wq, cpu_number());
994
995 return (KERN_FAILURE);
996 }
997
998
999 /*
1000 * clear_wait:
1001 *
1002 * Clear the wait condition for the specified thread. Start the thread
1003 * executing if that is appropriate.
1004 *
1005 * parameters:
1006 * thread thread to awaken
1007 * result Wakeup result the thread should see
1008 */
1009 kern_return_t
1010 clear_wait(
1011 thread_t thread,
1012 wait_result_t result)
1013 {
1014 kern_return_t ret;
1015 spl_t s;
1016
1017 s = splsched();
1018 thread_lock(thread);
1019 ret = clear_wait_internal(thread, result);
1020 thread_unlock(thread);
1021 splx(s);
1022 return ret;
1023 }
1024
1025
1026 /*
1027 * thread_wakeup_prim:
1028 *
1029 * Common routine for thread_wakeup, thread_wakeup_with_result,
1030 * and thread_wakeup_one.
1031 *
1032 */
1033 kern_return_t
1034 thread_wakeup_prim(
1035 event_t event,
1036 boolean_t one_thread,
1037 wait_result_t result)
1038 {
1039 register wait_queue_t wq;
1040 register int index;
1041
1042 index = wait_hash(event);
1043 wq = &wait_queues[index];
1044 if (one_thread)
1045 return (wait_queue_wakeup_one(wq, event, result));
1046 else
1047 return (wait_queue_wakeup_all(wq, event, result));
1048 }
1049
1050 /*
1051 * thread_bind:
1052 *
1053 * Force the current thread to execute on the specified processor.
1054 *
1055 * Returns the previous binding. PROCESSOR_NULL means
1056 * not bound.
1057 *
1058 * XXX - DO NOT export this to users - XXX
1059 */
1060 processor_t
1061 thread_bind(
1062 processor_t processor)
1063 {
1064 thread_t self = current_thread();
1065 processor_t prev;
1066 spl_t s;
1067
1068 s = splsched();
1069 thread_lock(self);
1070
1071 prev = self->bound_processor;
1072 self->bound_processor = processor;
1073
1074 thread_unlock(self);
1075 splx(s);
1076
1077 return (prev);
1078 }
1079
1080 /*
1081 * thread_select:
1082 *
1083 * Select a new thread for the current processor to execute.
1084 *
1085 * May select the current thread, which must be locked.
1086 */
1087 static thread_t
1088 thread_select(
1089 thread_t thread,
1090 processor_t processor)
1091 {
1092 processor_set_t pset = processor->processor_set;
1093 thread_t new_thread = THREAD_NULL;
1094 boolean_t inactive_state;
1095
1096 do {
1097 /*
1098 * Update the priority.
1099 */
1100 if (thread->sched_stamp != sched_tick)
1101 update_priority(thread);
1102
1103 processor->current_pri = thread->sched_pri;
1104
1105 pset_lock(pset);
1106
1107 inactive_state = processor->state != PROCESSOR_SHUTDOWN && machine_cpu_is_inactive(processor->cpu_id);
1108
1109 simple_lock(&rt_lock);
1110
1111 /*
1112 * Test to see if the current thread should continue
1113 * to run on this processor. Must be runnable, and not
1114 * bound to a different processor, nor be in the wrong
1115 * processor set.
1116 */
1117 if ( thread->state == TH_RUN &&
1118 (thread->sched_pri >= BASEPRI_RTQUEUES ||
1119 processor->processor_meta == PROCESSOR_META_NULL ||
1120 processor->processor_meta->primary == processor) &&
1121 (thread->bound_processor == PROCESSOR_NULL ||
1122 thread->bound_processor == processor) &&
1123 (thread->affinity_set == AFFINITY_SET_NULL ||
1124 thread->affinity_set->aset_pset == pset) ) {
1125 if ( thread->sched_pri >= BASEPRI_RTQUEUES &&
1126 first_timeslice(processor) ) {
1127 if (rt_runq.highq >= BASEPRI_RTQUEUES) {
1128 register run_queue_t runq = &rt_runq;
1129 register queue_t q;
1130
1131 q = runq->queues + runq->highq;
1132 if (((thread_t)q->next)->realtime.deadline <
1133 processor->deadline) {
1134 thread = (thread_t)q->next;
1135 ((queue_entry_t)thread)->next->prev = q;
1136 q->next = ((queue_entry_t)thread)->next;
1137 thread->runq = PROCESSOR_NULL;
1138 runq->count--; runq->urgency--;
1139 assert(runq->urgency >= 0);
1140 if (queue_empty(q)) {
1141 if (runq->highq != IDLEPRI)
1142 clrbit(MAXPRI - runq->highq, runq->bitmap);
1143 runq->highq = MAXPRI - ffsbit(runq->bitmap);
1144 }
1145 }
1146 }
1147
1148 simple_unlock(&rt_lock);
1149
1150 processor->deadline = thread->realtime.deadline;
1151
1152 pset_unlock(pset);
1153
1154 return (thread);
1155 }
1156
1157 if (!inactive_state && rt_runq.highq < thread->sched_pri &&
1158 (new_thread = choose_thread(processor, thread->sched_pri)) == THREAD_NULL) {
1159
1160 simple_unlock(&rt_lock);
1161
1162 /* I am the highest priority runnable (non-idle) thread */
1163
1164 pset_pri_hint(pset, processor, processor->current_pri);
1165
1166 pset_count_hint(pset, processor, processor->runq.count);
1167
1168 processor->deadline = UINT64_MAX;
1169
1170 pset_unlock(pset);
1171
1172 return (thread);
1173 }
1174 }
1175
1176 if (new_thread != THREAD_NULL ||
1177 (processor->runq.highq >= rt_runq.highq &&
1178 (new_thread = choose_thread(processor, MINPRI)) != THREAD_NULL)) {
1179 simple_unlock(&rt_lock);
1180
1181 if (!inactive_state) {
1182 pset_pri_hint(pset, processor, new_thread->sched_pri);
1183
1184 pset_count_hint(pset, processor, processor->runq.count);
1185 }
1186
1187 processor->deadline = UINT64_MAX;
1188 pset_unlock(pset);
1189
1190 return (new_thread);
1191 }
1192
1193 if (rt_runq.count > 0) {
1194 thread = run_queue_dequeue(&rt_runq, SCHED_HEADQ);
1195 simple_unlock(&rt_lock);
1196
1197 processor->deadline = thread->realtime.deadline;
1198 pset_unlock(pset);
1199
1200 return (thread);
1201 }
1202
1203 simple_unlock(&rt_lock);
1204
1205 processor->deadline = UINT64_MAX;
1206
1207 /*
1208 * Set processor inactive based on
1209 * indication from the platform code.
1210 */
1211 if (inactive_state) {
1212 if (processor->state == PROCESSOR_RUNNING)
1213 remqueue(&pset->active_queue, (queue_entry_t)processor);
1214 else
1215 if (processor->state == PROCESSOR_IDLE)
1216 remqueue(&pset->idle_queue, (queue_entry_t)processor);
1217
1218 processor->state = PROCESSOR_INACTIVE;
1219
1220 pset_unlock(pset);
1221
1222 return (processor->idle_thread);
1223 }
1224
1225 /*
1226 * No runnable threads, attempt to steal
1227 * from other processors.
1228 */
1229 new_thread = steal_thread(pset);
1230 if (new_thread != THREAD_NULL)
1231 return (new_thread);
1232
1233 /*
1234 * If other threads have appeared, shortcut
1235 * around again.
1236 */
1237 if (processor->runq.count > 0 || rt_runq.count > 0)
1238 continue;
1239
1240 pset_lock(pset);
1241
1242 /*
1243 * Nothing is runnable, so set this processor idle if it
1244 * was running.
1245 */
1246 if (processor->state == PROCESSOR_RUNNING) {
1247 remqueue(&pset->active_queue, (queue_entry_t)processor);
1248 processor->state = PROCESSOR_IDLE;
1249
1250 if (processor->processor_meta == PROCESSOR_META_NULL || processor->processor_meta->primary == processor) {
1251 enqueue_head(&pset->idle_queue, (queue_entry_t)processor);
1252 pset->low_pri = pset->low_count = processor;
1253 }
1254 else {
1255 enqueue_head(&processor->processor_meta->idle_queue, (queue_entry_t)processor);
1256
1257 if (thread->sched_pri < BASEPRI_RTQUEUES) {
1258 pset_unlock(pset);
1259
1260 return (processor->idle_thread);
1261 }
1262 }
1263 }
1264
1265 pset_unlock(pset);
1266
1267 /*
1268 * Choose idle thread if fast idle is not possible.
1269 */
1270 if ((thread->state & (TH_IDLE|TH_TERMINATE|TH_SUSP)) || !(thread->state & TH_WAIT) || thread->wake_active)
1271 return (processor->idle_thread);
1272
1273 /*
1274 * Perform idling activities directly without a
1275 * context switch. Return dispatched thread,
1276 * else check again for a runnable thread.
1277 */
1278 new_thread = thread_select_idle(thread, processor);
1279
1280 } while (new_thread == THREAD_NULL);
1281
1282 return (new_thread);
1283 }
1284
1285 /*
1286 * thread_select_idle:
1287 *
1288 * Idle the processor using the current thread context.
1289 *
1290 * Called with thread locked, then dropped and relocked.
1291 */
1292 static thread_t
1293 thread_select_idle(
1294 thread_t thread,
1295 processor_t processor)
1296 {
1297 thread_t new_thread;
1298
1299 if (thread->sched_mode & TH_MODE_TIMESHARE)
1300 sched_share_decr();
1301 sched_run_decr();
1302
1303 thread->state |= TH_IDLE;
1304 processor->current_pri = IDLEPRI;
1305
1306 thread_unlock(thread);
1307
1308 /*
1309 * Switch execution timing to processor idle thread.
1310 */
1311 processor->last_dispatch = mach_absolute_time();
1312 thread_timer_event(processor->last_dispatch, &processor->idle_thread->system_timer);
1313 PROCESSOR_DATA(processor, kernel_timer) = &processor->idle_thread->system_timer;
1314
1315 /*
1316 * Cancel the quantum timer while idling.
1317 */
1318 timer_call_cancel(&processor->quantum_timer);
1319 processor->timeslice = 0;
1320
1321 (*thread->sched_call)(SCHED_CALL_BLOCK, thread);
1322
1323 /*
1324 * Enable interrupts and perform idling activities. No
1325 * preemption due to TH_IDLE being set.
1326 */
1327 spllo(); new_thread = processor_idle(thread, processor);
1328
1329 /*
1330 * Return at splsched.
1331 */
1332 (*thread->sched_call)(SCHED_CALL_UNBLOCK, thread);
1333
1334 thread_lock(thread);
1335
1336 /*
1337 * If awakened, switch to thread timer and start a new quantum.
1338 * Otherwise skip; we will context switch to another thread or return here.
1339 */
1340 if (!(thread->state & TH_WAIT)) {
1341 processor->last_dispatch = mach_absolute_time();
1342 thread_timer_event(processor->last_dispatch, &thread->system_timer);
1343 PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
1344
1345 thread_quantum_init(thread);
1346
1347 processor->quantum_end = processor->last_dispatch + thread->current_quantum;
1348 timer_call_enter1(&processor->quantum_timer, thread, processor->quantum_end);
1349 processor->timeslice = 1;
1350
1351 thread->computation_epoch = processor->last_dispatch;
1352 }
1353
1354 thread->state &= ~TH_IDLE;
1355
1356 sched_run_incr();
1357 if (thread->sched_mode & TH_MODE_TIMESHARE)
1358 sched_share_incr();
1359
1360 return (new_thread);
1361 }
1362
1363 /*
1364 * choose_thread:
1365 *
1366 * Locate a thread to execute from the processor run queue
1367 * and return it. Only choose a thread with greater or equal
1368 * priority.
1369 *
1370 * Associated pset must be locked. Returns THREAD_NULL
1371 * on failure.
1372 */
1373 static thread_t
1374 choose_thread(
1375 processor_t processor,
1376 int priority)
1377 {
1378 run_queue_t rq = &processor->runq;
1379 queue_t queue = rq->queues + rq->highq;
1380 int pri = rq->highq, count = rq->count;
1381 thread_t thread;
1382
1383 while (count > 0 && pri >= priority) {
1384 thread = (thread_t)queue_first(queue);
1385 while (!queue_end(queue, (queue_entry_t)thread)) {
1386 if (thread->bound_processor == PROCESSOR_NULL ||
1387 thread->bound_processor == processor) {
1388 remqueue(queue, (queue_entry_t)thread);
1389
1390 thread->runq = PROCESSOR_NULL;
1391 rq->count--;
1392 if (testbit(pri, sched_preempt_pri)) {
1393 rq->urgency--; assert(rq->urgency >= 0);
1394 }
1395 if (queue_empty(queue)) {
1396 if (pri != IDLEPRI)
1397 clrbit(MAXPRI - pri, rq->bitmap);
1398 rq->highq = MAXPRI - ffsbit(rq->bitmap);
1399 }
1400
1401 return (thread);
1402 }
1403 count--;
1404
1405 thread = (thread_t)queue_next((queue_entry_t)thread);
1406 }
1407
1408 queue--; pri--;
1409 }
1410
1411 return (THREAD_NULL);
1412 }
1413
1414 /*
1415 * Perform a context switch and start executing the new thread.
1416 *
1417 * Returns FALSE on failure, and the thread is re-dispatched.
1418 *
1419 * Called at splsched.
1420 */
1421
1422 #define funnel_release_check(thread, debug) \
1423 MACRO_BEGIN \
1424 if ((thread)->funnel_state & TH_FN_OWNED) { \
1425 (thread)->funnel_state = TH_FN_REFUNNEL; \
1426 KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE, \
1427 (thread)->funnel_lock, (debug), 0, 0, 0); \
1428 funnel_unlock((thread)->funnel_lock); \
1429 } \
1430 MACRO_END
1431
1432 #define funnel_refunnel_check(thread, debug) \
1433 MACRO_BEGIN \
1434 if ((thread)->funnel_state & TH_FN_REFUNNEL) { \
1435 kern_return_t result = (thread)->wait_result; \
1436 \
1437 (thread)->funnel_state = 0; \
1438 KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE, \
1439 (thread)->funnel_lock, (debug), 0, 0, 0); \
1440 funnel_lock((thread)->funnel_lock); \
1441 KERNEL_DEBUG(0x6032430 | DBG_FUNC_NONE, \
1442 (thread)->funnel_lock, (debug), 0, 0, 0); \
1443 (thread)->funnel_state = TH_FN_OWNED; \
1444 (thread)->wait_result = result; \
1445 } \
1446 MACRO_END
1447
1448 static boolean_t
1449 thread_invoke(
1450 register thread_t self,
1451 register thread_t thread,
1452 ast_t reason)
1453 {
1454 thread_continue_t continuation = self->continuation;
1455 void *parameter = self->parameter;
1456 processor_t processor;
1457
1458 if (get_preemption_level() != 0) {
1459 int pl = get_preemption_level();
1460 panic("thread_invoke: preemption_level %d, possible cause: %s",
1461 pl, (pl < 0 ? "unlocking an unlocked mutex or spinlock" :
1462 "blocking while holding a spinlock, or within interrupt context"));
1463 }
1464
1465 assert(self == current_thread());
1466
1467 /*
1468 * Mark thread interruptible.
1469 */
1470 thread_lock(thread);
1471 thread->state &= ~TH_UNINT;
1472
1473 #if DEBUG
1474 assert(thread_runnable(thread));
1475 #endif
1476
1477 /*
1478 * Allow time constraint threads to hang onto
1479 * a stack.
1480 */
1481 if ((self->sched_mode & TH_MODE_REALTIME) && !self->reserved_stack)
1482 self->reserved_stack = self->kernel_stack;
1483
1484 if (continuation != NULL) {
1485 if (!thread->kernel_stack) {
1486 /*
1487 * If we are using a privileged stack,
1488 * check to see whether we can exchange it with
1489 * that of the other thread.
1490 */
1491 if (self->kernel_stack == self->reserved_stack && !thread->reserved_stack)
1492 goto need_stack;
1493
1494 /*
1495 * Context switch by performing a stack handoff.
1496 */
1497 continuation = thread->continuation;
1498 parameter = thread->parameter;
1499
1500 processor = current_processor();
1501 processor->active_thread = thread;
1502 processor->current_pri = thread->sched_pri;
1503 if (thread->last_processor != processor && thread->last_processor != NULL) {
1504 if (thread->last_processor->processor_set != processor->processor_set)
1505 thread->ps_switch++;
1506 thread->p_switch++;
1507 }
1508 thread->last_processor = processor;
1509 thread->c_switch++;
1510 ast_context(thread);
1511 thread_unlock(thread);
1512
1513 self->reason = reason;
1514
1515 processor->last_dispatch = mach_absolute_time();
1516 thread_timer_event(processor->last_dispatch, &thread->system_timer);
1517 PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
1518
1519 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_HANDOFF)|DBG_FUNC_NONE,
1520 self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
1521
1522 DTRACE_SCHED2(off__cpu, struct thread *, thread, struct proc *, thread->task->bsd_info);
1523
1524 TLOG(1, "thread_invoke: calling machine_stack_handoff\n");
1525 machine_stack_handoff(self, thread);
1526
1527 DTRACE_SCHED(on__cpu);
1528
1529 thread_dispatch(self, thread);
1530
1531 thread->continuation = thread->parameter = NULL;
1532
1533 counter(c_thread_invoke_hits++);
1534
1535 funnel_refunnel_check(thread, 2);
1536 (void) spllo();
1537
1538 assert(continuation);
1539 call_continuation(continuation, parameter, thread->wait_result);
1540 /*NOTREACHED*/
1541 }
1542 else if (thread == self) {
1543 /* same thread but with continuation */
1544 ast_context(self);
1545 counter(++c_thread_invoke_same);
1546 thread_unlock(self);
1547
1548 self->continuation = self->parameter = NULL;
1549
1550 funnel_refunnel_check(self, 3);
1551 (void) spllo();
1552
1553 call_continuation(continuation, parameter, self->wait_result);
1554 /*NOTREACHED*/
1555 }
1556 }
1557 else {
1558 /*
1559 * Check that the other thread has a stack
1560 */
1561 if (!thread->kernel_stack) {
1562 need_stack:
1563 if (!stack_alloc_try(thread)) {
1564 counter(c_thread_invoke_misses++);
1565 thread_unlock(thread);
1566 thread_stack_enqueue(thread);
1567 return (FALSE);
1568 }
1569 }
1570 else if (thread == self) {
1571 ast_context(self);
1572 counter(++c_thread_invoke_same);
1573 thread_unlock(self);
1574 return (TRUE);
1575 }
1576 }
1577
1578 /*
1579 * Context switch by full context save.
1580 */
1581 processor = current_processor();
1582 processor->active_thread = thread;
1583 processor->current_pri = thread->sched_pri;
1584 if (thread->last_processor != processor && thread->last_processor != NULL) {
1585 if (thread->last_processor->processor_set != processor->processor_set)
1586 thread->ps_switch++;
1587 thread->p_switch++;
1588 }
1589 thread->last_processor = processor;
1590 thread->c_switch++;
1591 ast_context(thread);
1592 thread_unlock(thread);
1593
1594 counter(c_thread_invoke_csw++);
1595
1596 assert(self->runq == PROCESSOR_NULL);
1597 self->reason = reason;
1598
1599 processor->last_dispatch = mach_absolute_time();
1600 thread_timer_event(processor->last_dispatch, &thread->system_timer);
1601 PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
1602
1603 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
1604 self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
1605
1606 DTRACE_SCHED2(off__cpu, struct thread *, thread, struct proc *, thread->task->bsd_info);
1607
1608 /*
1609 * This is where we actually switch register context,
1610 * and address space if required. We will next run
1611 * as a result of a subsequent context switch.
1612 */
1613 thread = machine_switch_context(self, continuation, thread);
1614 TLOG(1,"thread_invoke: returning machine_switch_context: self %p continuation %p thread %p\n", self, continuation, thread);
1615
1616 DTRACE_SCHED(on__cpu);
1617
1618 /*
1619 * We have been resumed and are set to run.
1620 */
1621 thread_dispatch(thread, self);
1622
1623 if (continuation) {
1624 self->continuation = self->parameter = NULL;
1625
1626 funnel_refunnel_check(self, 3);
1627 (void) spllo();
1628
1629 call_continuation(continuation, parameter, self->wait_result);
1630 /*NOTREACHED*/
1631 }
1632
1633 return (TRUE);
1634 }
1635
1636 /*
1637 * thread_dispatch:
1638 *
1639 * Handle threads at context switch. Re-dispatch other thread
1640 * if still running, otherwise update run state and perform
1641 * special actions. Update quantum for other thread and begin
1642 * the quantum for ourselves.
1643 *
1644 * Called at splsched.
1645 */
1646 void
1647 thread_dispatch(
1648 thread_t thread,
1649 thread_t self)
1650 {
1651 processor_t processor = self->last_processor;
1652
1653 if (thread != THREAD_NULL) {
1654 /*
1655 * If blocked at a continuation, discard
1656 * the stack.
1657 */
1658 if (thread->continuation != NULL && thread->kernel_stack != 0)
1659 stack_free(thread);
1660
1661 if (!(thread->state & TH_IDLE)) {
1662 wake_lock(thread);
1663 thread_lock(thread);
1664
1665 /*
1666 * Compute remainder of current quantum.
1667 */
1668 if ( first_timeslice(processor) &&
1669 processor->quantum_end > processor->last_dispatch )
1670 thread->current_quantum = (uint32_t)(processor->quantum_end - processor->last_dispatch);
1671 else
1672 thread->current_quantum = 0;
1673
1674 if (thread->sched_mode & TH_MODE_REALTIME) {
1675 /*
1676 * Cancel the deadline if the thread has
1677 * consumed the entire quantum.
1678 */
1679 if (thread->current_quantum == 0) {
1680 thread->realtime.deadline = UINT64_MAX;
1681 thread->reason |= AST_QUANTUM;
1682 }
1683 }
1684 else {
1685 /*
1686 * For non-realtime threads treat a tiny
1687 * remaining quantum as an expired quantum
1688 * but include what's left next time.
1689 */
1690 if (thread->current_quantum < min_std_quantum) {
1691 thread->reason |= AST_QUANTUM;
1692 thread->current_quantum += std_quantum;
1693 }
1694 }
1695
1696 /*
1697 * If we are doing a direct handoff then
1698 * take the remainder of the quantum.
1699 */
1700 if ((thread->reason & (AST_HANDOFF|AST_QUANTUM)) == AST_HANDOFF) {
1701 self->current_quantum = thread->current_quantum;
1702 thread->reason |= AST_QUANTUM;
1703 thread->current_quantum = 0;
1704 }
1705
1706 thread->computation_metered += (processor->last_dispatch - thread->computation_epoch);
1707
1708 if (!(thread->state & TH_WAIT)) {
1709 /*
1710 * Still running.
1711 */
1712 if (thread->reason & AST_QUANTUM)
1713 thread_setrun(thread, SCHED_TAILQ);
1714 else
1715 if (thread->reason & AST_PREEMPT)
1716 thread_setrun(thread, SCHED_HEADQ);
1717 else
1718 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1719
1720 thread->reason = AST_NONE;
1721
1722 thread_unlock(thread);
1723 wake_unlock(thread);
1724 }
1725 else {
1726 /*
1727 * Waiting.
1728 */
1729 thread->state &= ~TH_RUN;
1730
1731 if (thread->sched_mode & TH_MODE_TIMESHARE)
1732 sched_share_decr();
1733 sched_run_decr();
1734
1735 if (thread->wake_active) {
1736 thread->wake_active = FALSE;
1737 thread_unlock(thread);
1738
1739 thread_wakeup(&thread->wake_active);
1740 }
1741 else
1742 thread_unlock(thread);
1743
1744 wake_unlock(thread);
1745
1746 (*thread->sched_call)(SCHED_CALL_BLOCK, thread);
1747
1748 if (thread->state & TH_TERMINATE)
1749 thread_terminate_enqueue(thread);
1750 }
1751 }
1752 }
1753
1754 if (!(self->state & TH_IDLE)) {
1755 /*
1756 * Get a new quantum if none remaining.
1757 */
1758 if (self->current_quantum == 0)
1759 thread_quantum_init(self);
1760
1761 /*
1762 * Set up quantum timer and timeslice.
1763 */
1764 processor->quantum_end = (processor->last_dispatch + self->current_quantum);
1765 timer_call_enter1(&processor->quantum_timer, self, processor->quantum_end);
1766
1767 processor->timeslice = 1;
1768
1769 self->computation_epoch = processor->last_dispatch;
1770 }
1771 else {
1772 timer_call_cancel(&processor->quantum_timer);
1773 processor->timeslice = 0;
1774 }
1775 }
1776
1777 #include <libkern/OSDebug.h>
1778
1779 uint32_t kdebug_thread_block = 0;
1780
1781
1782 /*
1783 * thread_block_reason:
1784 *
1785 * Forces a reschedule, blocking the caller if a wait
1786 * has been asserted.
1787 *
1788 * If a continuation is specified, then thread_invoke will
1789 * attempt to discard the thread's kernel stack. When the
1790 * thread resumes, it will execute the continuation function
1791 * on a new kernel stack.
1792 */
1793 counter(mach_counter_t c_thread_block_calls = 0;)
1794
1795 wait_result_t
1796 thread_block_reason(
1797 thread_continue_t continuation,
1798 void *parameter,
1799 ast_t reason)
1800 {
1801 register thread_t self = current_thread();
1802 register processor_t processor;
1803 register thread_t new_thread;
1804 spl_t s;
1805
1806 counter(++c_thread_block_calls);
1807
1808 s = splsched();
1809
1810 if (!(reason & AST_PREEMPT))
1811 funnel_release_check(self, 2);
1812
1813 processor = current_processor();
1814
1815 /* If we're explicitly yielding, force a subsequent quantum */
1816 if (reason & AST_YIELD)
1817 processor->timeslice = 0;
1818
1819 /* We're handling all scheduling AST's */
1820 ast_off(AST_SCHEDULING);
1821
1822 self->continuation = continuation;
1823 self->parameter = parameter;
1824
1825 if (kdebug_thread_block && kdebug_enable && self->state != TH_RUN) {
1826 uint32_t bt[8];
1827
1828 OSBacktrace((void **)&bt[0], 8);
1829
1830 KERNEL_DEBUG_CONSTANT(0x140004c | DBG_FUNC_START, bt[0], bt[1], bt[2], bt[3], 0);
1831 KERNEL_DEBUG_CONSTANT(0x140004c | DBG_FUNC_END, bt[4], bt[5], bt[6], bt[7], 0);
1832 }
1833
1834 do {
1835 thread_lock(self);
1836 new_thread = thread_select(self, processor);
1837 thread_unlock(self);
1838 } while (!thread_invoke(self, new_thread, reason));
1839
1840 funnel_refunnel_check(self, 5);
1841 splx(s);
1842
1843 return (self->wait_result);
1844 }
1845
1846 /*
1847 * thread_block:
1848 *
1849 * Block the current thread if a wait has been asserted.
1850 */
1851 wait_result_t
1852 thread_block(
1853 thread_continue_t continuation)
1854 {
1855 return thread_block_reason(continuation, NULL, AST_NONE);
1856 }
1857
1858 wait_result_t
1859 thread_block_parameter(
1860 thread_continue_t continuation,
1861 void *parameter)
1862 {
1863 return thread_block_reason(continuation, parameter, AST_NONE);
1864 }
1865
1866 /*
1867 * thread_run:
1868 *
1869 * Switch directly from the current thread to the
1870 * new thread, handing off our quantum if appropriate.
1871 *
1872 * New thread must be runnable, and not on a run queue.
1873 *
1874 * Called at splsched.
1875 */
1876 int
1877 thread_run(
1878 thread_t self,
1879 thread_continue_t continuation,
1880 void *parameter,
1881 thread_t new_thread)
1882 {
1883 ast_t handoff = AST_HANDOFF;
1884
1885 funnel_release_check(self, 3);
1886
1887 self->continuation = continuation;
1888 self->parameter = parameter;
1889
1890 while (!thread_invoke(self, new_thread, handoff)) {
1891 processor_t processor = current_processor();
1892
1893 thread_lock(self);
1894 new_thread = thread_select(self, processor);
1895 thread_unlock(self);
1896 handoff = AST_NONE;
1897 }
1898
1899 funnel_refunnel_check(self, 6);
1900
1901 return (self->wait_result);
1902 }
1903
1904 /*
1905 * thread_continue:
1906 *
1907 * Called at splsched when a thread first receives
1908 * a new stack after a continuation.
1909 */
1910 void
1911 thread_continue(
1912 register thread_t thread)
1913 {
1914 register thread_t self = current_thread();
1915 register thread_continue_t continuation;
1916 register void *parameter;
1917
1918 DTRACE_SCHED(on__cpu);
1919
1920 continuation = self->continuation;
1921 parameter = self->parameter;
1922
1923 thread_dispatch(thread, self);
1924
1925 self->continuation = self->parameter = NULL;
1926
1927 funnel_refunnel_check(self, 4);
1928
1929 if (thread != THREAD_NULL)
1930 (void)spllo();
1931
1932 TLOG(1, "thread_continue: calling call_continuation \n");
1933 call_continuation(continuation, parameter, self->wait_result);
1934 /*NOTREACHED*/
1935 }
1936
1937 /*
1938 * run_queue_init:
1939 *
1940 * Initialize a run queue before first use.
1941 */
1942 void
1943 run_queue_init(
1944 run_queue_t rq)
1945 {
1946 int i;
1947
1948 rq->highq = IDLEPRI;
1949 for (i = 0; i < NRQBM; i++)
1950 rq->bitmap[i] = 0;
1951 setbit(MAXPRI - IDLEPRI, rq->bitmap);
1952 rq->urgency = rq->count = 0;
1953 for (i = 0; i < NRQS; i++)
1954 queue_init(&rq->queues[i]);
1955 }
1956
1957 /*
1958 * run_queue_dequeue:
1959 *
1960 * Perform a dequeue operation on a run queue,
1961 * and return the resulting thread.
1962 *
1963 * The run queue must be locked (see run_queue_remove()
1964 * for more info), and not empty.
1965 */
1966 static thread_t
1967 run_queue_dequeue(
1968 run_queue_t rq,
1969 integer_t options)
1970 {
1971 thread_t thread;
1972 queue_t queue = rq->queues + rq->highq;
1973
1974 if (options & SCHED_HEADQ) {
1975 thread = (thread_t)queue->next;
1976 ((queue_entry_t)thread)->next->prev = queue;
1977 queue->next = ((queue_entry_t)thread)->next;
1978 }
1979 else {
1980 thread = (thread_t)queue->prev;
1981 ((queue_entry_t)thread)->prev->next = queue;
1982 queue->prev = ((queue_entry_t)thread)->prev;
1983 }
1984
1985 thread->runq = PROCESSOR_NULL;
1986 rq->count--;
1987 if (testbit(rq->highq, sched_preempt_pri)) {
1988 rq->urgency--; assert(rq->urgency >= 0);
1989 }
1990 if (queue_empty(queue)) {
1991 if (rq->highq != IDLEPRI)
1992 clrbit(MAXPRI - rq->highq, rq->bitmap);
1993 rq->highq = MAXPRI - ffsbit(rq->bitmap);
1994 }
1995
1996 return (thread);
1997 }
1998
1999 /*
2000 * realtime_queue_insert:
2001 *
2002 * Enqueue a thread for realtime execution.
2003 */
2004 static boolean_t
2005 realtime_queue_insert(
2006 thread_t thread)
2007 {
2008 run_queue_t rq = &rt_runq;
2009 queue_t queue = rq->queues + thread->sched_pri;
2010 uint64_t deadline = thread->realtime.deadline;
2011 boolean_t preempt = FALSE;
2012
2013 simple_lock(&rt_lock);
2014
2015 if (queue_empty(queue)) {
2016 enqueue_tail(queue, (queue_entry_t)thread);
2017
2018 setbit(MAXPRI - thread->sched_pri, rq->bitmap);
2019 if (thread->sched_pri > rq->highq)
2020 rq->highq = thread->sched_pri;
2021 preempt = TRUE;
2022 }
2023 else {
2024 register thread_t entry = (thread_t)queue_first(queue);
2025
2026 while (TRUE) {
2027 if ( queue_end(queue, (queue_entry_t)entry) ||
2028 deadline < entry->realtime.deadline ) {
2029 entry = (thread_t)queue_prev((queue_entry_t)entry);
2030 break;
2031 }
2032
2033 entry = (thread_t)queue_next((queue_entry_t)entry);
2034 }
2035
2036 if ((queue_entry_t)entry == queue)
2037 preempt = TRUE;
2038
2039 insque((queue_entry_t)thread, (queue_entry_t)entry);
2040 }
2041
2042 thread->runq = RT_RUNQ;
2043 rq->count++; rq->urgency++;
2044
2045 simple_unlock(&rt_lock);
2046
2047 return (preempt);
2048 }
2049
2050 /*
2051 * realtime_setrun:
2052 *
2053 * Dispatch a thread for realtime execution.
2054 *
2055 * Thread must be locked. Associated pset must
2056 * be locked, and is returned unlocked.
2057 */
2058 static void
2059 realtime_setrun(
2060 processor_t processor,
2061 thread_t thread)
2062 {
2063 processor_set_t pset = processor->processor_set;
2064
2065 /*
2066 * Dispatch directly onto idle processor.
2067 */
2068 if (processor->state == PROCESSOR_IDLE) {
2069 remqueue(&pset->idle_queue, (queue_entry_t)processor);
2070 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
2071
2072 processor->next_thread = thread;
2073 processor->deadline = thread->realtime.deadline;
2074 processor->state = PROCESSOR_DISPATCHING;
2075 pset_unlock(pset);
2076
2077 if (processor != current_processor())
2078 machine_signal_idle(processor);
2079 return;
2080 }
2081
2082 if (realtime_queue_insert(thread)) {
2083 if (processor == current_processor())
2084 ast_on(AST_PREEMPT | AST_URGENT);
2085 else
2086 cause_ast_check(processor);
2087 }
2088
2089 pset_unlock(pset);
2090 }
2091
2092 /*
2093 * processor_enqueue:
2094 *
2095 * Enqueue thread on a processor run queue. Thread must be locked,
2096 * and not already be on a run queue.
2097 *
2098 * Returns TRUE if a preemption is indicated based on the state
2099 * of the run queue.
2100 *
2101 * The run queue must be locked (see run_queue_remove()
2102 * for more info).
2103 */
2104 static boolean_t
2105 processor_enqueue(
2106 processor_t processor,
2107 thread_t thread,
2108 integer_t options)
2109 {
2110 run_queue_t rq = &processor->runq;
2111 queue_t queue = rq->queues + thread->sched_pri;
2112 boolean_t result = FALSE;
2113
2114 if (queue_empty(queue)) {
2115 enqueue_tail(queue, (queue_entry_t)thread);
2116
2117 setbit(MAXPRI - thread->sched_pri, rq->bitmap);
2118 if (thread->sched_pri > rq->highq) {
2119 rq->highq = thread->sched_pri;
2120 result = TRUE;
2121 }
2122 }
2123 else
2124 if (options & SCHED_TAILQ)
2125 enqueue_tail(queue, (queue_entry_t)thread);
2126 else
2127 enqueue_head(queue, (queue_entry_t)thread);
2128
2129 thread->runq = processor;
2130 if (testbit(thread->sched_pri, sched_preempt_pri))
2131 rq->urgency++;
2132 rq->count++;
2133
2134 return (result);
2135 }
2136
2137 /*
2138 * processor_setrun:
2139 *
2140 * Dispatch a thread for execution on a
2141 * processor.
2142 *
2143 * Thread must be locked. Associated pset must
2144 * be locked, and is returned unlocked.
2145 */
2146 static void
2147 processor_setrun(
2148 processor_t processor,
2149 thread_t thread,
2150 integer_t options)
2151 {
2152 processor_set_t pset = processor->processor_set;
2153 ast_t preempt;
2154
2155 /*
2156 * Dispatch directly onto idle processor.
2157 */
2158 if (processor->state == PROCESSOR_IDLE) {
2159 remqueue(&pset->idle_queue, (queue_entry_t)processor);
2160 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
2161
2162 processor->next_thread = thread;
2163 processor->deadline = UINT64_MAX;
2164 processor->state = PROCESSOR_DISPATCHING;
2165 pset_unlock(pset);
2166
2167 if (processor != current_processor())
2168 machine_signal_idle(processor);
2169 return;
2170 }
2171
2172 /*
2173 * Set preemption mode.
2174 */
2175 if (testbit(thread->sched_pri, sched_preempt_pri))
2176 preempt = (AST_PREEMPT | AST_URGENT);
2177 else
2178 if (thread->sched_mode & TH_MODE_TIMESHARE && thread->sched_pri < thread->priority)
2179 preempt = AST_NONE;
2180 else
2181 preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE;
2182
2183 if (!processor_enqueue(processor, thread, options))
2184 preempt = AST_NONE;
2185
2186 if (preempt != AST_NONE) {
2187 if (processor == current_processor()) {
2188 if (csw_check(processor) != AST_NONE)
2189 ast_on(preempt);
2190 }
2191 else
2192 if ( (processor->state == PROCESSOR_RUNNING ||
2193 processor->state == PROCESSOR_SHUTDOWN) &&
2194 thread->sched_pri >= processor->current_pri ) {
2195 cause_ast_check(processor);
2196 }
2197 }
2198 else
2199 if ( processor->state == PROCESSOR_SHUTDOWN &&
2200 thread->sched_pri >= processor->current_pri ) {
2201 cause_ast_check(processor);
2202 }
2203
2204 pset_unlock(pset);
2205 }
2206
2207 #define next_pset(p) (((p)->pset_list != PROCESSOR_SET_NULL)? (p)->pset_list: (p)->node->psets)
2208
2209 /*
2210 * choose_next_pset:
2211 *
2212 * Return the next sibling pset containing
2213 * available processors.
2214 *
2215 * Returns the original pset if none other is
2216 * suitable.
2217 */
2218 static processor_set_t
2219 choose_next_pset(
2220 processor_set_t pset)
2221 {
2222 processor_set_t nset = pset;
2223
2224 do {
2225 nset = next_pset(nset);
2226 } while (nset->processor_count < 1 && nset != pset);
2227
2228 return (nset);
2229 }
2230
2231 /*
2232 * choose_processor:
2233 *
2234 * Choose a processor for the thread, beginning at
2235 * the pset.
2236 *
2237 * Returns a processor, possibly from a different pset.
2238 *
2239 * The thread must be locked. The pset must be locked,
2240 * and the resulting pset is locked on return.
2241 */
2242 static processor_t
2243 choose_processor(
2244 processor_set_t pset,
2245 thread_t thread)
2246 {
2247 processor_set_t nset, cset = pset;
2248 processor_t processor = thread->last_processor;
2249 processor_meta_t pmeta = PROCESSOR_META_NULL;
2250
2251 /*
2252 * Prefer the last processor, when appropriate.
2253 */
2254 if (processor != PROCESSOR_NULL) {
2255 if (thread->sched_pri < BASEPRI_RTQUEUES && processor->processor_meta != PROCESSOR_META_NULL &&
2256 processor->processor_meta->primary->state == PROCESSOR_IDLE)
2257 processor = processor->processor_meta->primary;
2258
2259 if (processor->processor_set != pset || processor->state == PROCESSOR_INACTIVE ||
2260 processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE)
2261 processor = PROCESSOR_NULL;
2262 else
2263 if (processor->state == PROCESSOR_IDLE ||
2264 (thread->sched_pri > BASEPRI_DEFAULT && processor->current_pri < thread->sched_pri))
2265 return (processor);
2266 }
2267
2268 /*
2269 * Iterate through the processor sets to locate
2270 * an appropriate processor.
2271 */
2272 do {
2273 /*
2274 * Choose an idle processor.
2275 */
2276 if (!queue_empty(&cset->idle_queue))
2277 return ((processor_t)queue_first(&cset->idle_queue));
2278
2279 if (thread->sched_pri >= BASEPRI_RTQUEUES) {
2280 /*
2281 * For an RT thread, iterate through active processors, first fit.
2282 */
2283 processor = (processor_t)queue_first(&cset->active_queue);
2284 while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) {
2285 if (thread->sched_pri > processor->current_pri ||
2286 thread->realtime.deadline < processor->deadline)
2287 return (processor);
2288
2289 if (pmeta == PROCESSOR_META_NULL) {
2290 if (processor->processor_meta != PROCESSOR_META_NULL &&
2291 !queue_empty(&processor->processor_meta->idle_queue))
2292 pmeta = processor->processor_meta;
2293 }
2294
2295 processor = (processor_t)queue_next((queue_entry_t)processor);
2296 }
2297
2298 if (pmeta != PROCESSOR_META_NULL)
2299 return ((processor_t)queue_first(&pmeta->idle_queue));
2300
2301 processor = PROCESSOR_NULL;
2302 }
2303 else {
2304 /*
2305 * Check any hinted processors in the processor set if available.
2306 */
2307 if (cset->low_pri != PROCESSOR_NULL && cset->low_pri->state != PROCESSOR_INACTIVE &&
2308 cset->low_pri->state != PROCESSOR_SHUTDOWN && cset->low_pri->state != PROCESSOR_OFF_LINE &&
2309 (processor == PROCESSOR_NULL ||
2310 (thread->sched_pri > BASEPRI_DEFAULT && cset->low_pri->current_pri < thread->sched_pri))) {
2311 processor = cset->low_pri;
2312 }
2313 else
2314 if (cset->low_count != PROCESSOR_NULL && cset->low_count->state != PROCESSOR_INACTIVE &&
2315 cset->low_count->state != PROCESSOR_SHUTDOWN && cset->low_count->state != PROCESSOR_OFF_LINE &&
2316 (processor == PROCESSOR_NULL || (thread->sched_pri <= BASEPRI_DEFAULT &&
2317 cset->low_count->runq.count < processor->runq.count))) {
2318 processor = cset->low_count;
2319 }
2320
2321 /*
2322 * Otherwise, choose an available processor in the set.
2323 */
2324 if (processor == PROCESSOR_NULL) {
2325 processor = (processor_t)dequeue_head(&cset->active_queue);
2326 if (processor != PROCESSOR_NULL)
2327 enqueue_tail(&cset->active_queue, (queue_entry_t)processor);
2328 }
2329
2330 if (processor != PROCESSOR_NULL && pmeta == PROCESSOR_META_NULL) {
2331 if (processor->processor_meta != PROCESSOR_META_NULL &&
2332 !queue_empty(&processor->processor_meta->idle_queue))
2333 pmeta = processor->processor_meta;
2334 }
2335 }
2336
2337 /*
2338 * Move onto the next processor set.
2339 */
2340 nset = next_pset(cset);
2341
2342 if (nset != pset) {
2343 pset_unlock(cset);
2344
2345 cset = nset;
2346 pset_lock(cset);
2347 }
2348 } while (nset != pset);
2349
2350 /*
2351 * Make sure that we pick a running processor,
2352 * and that the correct processor set is locked.
2353 */
2354 do {
2355 if (pmeta != PROCESSOR_META_NULL) {
2356 if (cset != pmeta->primary->processor_set) {
2357 pset_unlock(cset);
2358
2359 cset = pmeta->primary->processor_set;
2360 pset_lock(cset);
2361 }
2362
2363 if (!queue_empty(&pmeta->idle_queue))
2364 return ((processor_t)queue_first(&pmeta->idle_queue));
2365
2366 pmeta = PROCESSOR_META_NULL;
2367 }
2368
2369 /*
2370 * If we haven't been able to choose a processor,
2371 * pick the boot processor and return it.
2372 */
2373 if (processor == PROCESSOR_NULL) {
2374 processor = master_processor;
2375
2376 /*
2377 * Check that the correct processor set is
2378 * returned locked.
2379 */
2380 if (cset != processor->processor_set) {
2381 pset_unlock(cset);
2382
2383 cset = processor->processor_set;
2384 pset_lock(cset);
2385 }
2386
2387 return (processor);
2388 }
2389
2390 /*
2391 * Check that the processor set for the chosen
2392 * processor is locked.
2393 */
2394 if (cset != processor->processor_set) {
2395 pset_unlock(cset);
2396
2397 cset = processor->processor_set;
2398 pset_lock(cset);
2399 }
2400
2401 /*
2402 * We must verify that the chosen processor is still available.
2403 */
2404 if (processor->state == PROCESSOR_INACTIVE ||
2405 processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE)
2406 processor = PROCESSOR_NULL;
2407 } while (processor == PROCESSOR_NULL);
2408
2409 return (processor);
2410 }
2411
2412 /*
2413 * thread_setrun:
2414 *
2415 * Dispatch thread for execution, onto an idle
2416 * processor or run queue, and signal a preemption
2417 * as appropriate.
2418 *
2419 * Thread must be locked.
2420 */
2421 void
2422 thread_setrun(
2423 thread_t thread,
2424 integer_t options)
2425 {
2426 processor_t processor;
2427 processor_set_t pset;
2428
2429 #if DEBUG
2430 assert(thread_runnable(thread));
2431 #endif
2432
2433 /*
2434 * Update priority if needed.
2435 */
2436 if (thread->sched_stamp != sched_tick)
2437 update_priority(thread);
2438
2439 assert(thread->runq == PROCESSOR_NULL);
2440
2441 if (thread->bound_processor == PROCESSOR_NULL) {
2442 /*
2443 * Unbound case.
2444 */
2445 if (thread->affinity_set != AFFINITY_SET_NULL) {
2446 /*
2447 * Use affinity set policy hint.
2448 */
2449 pset = thread->affinity_set->aset_pset;
2450 pset_lock(pset);
2451
2452 processor = choose_processor(pset, thread);
2453 }
2454 else
2455 if (thread->last_processor != PROCESSOR_NULL) {
2456 /*
2457 * Simple (last processor) affinity case.
2458 */
2459 processor = thread->last_processor;
2460 pset = processor->processor_set;
2461 pset_lock(pset);
2462
2463 /*
2464 * Choose a different processor in certain cases.
2465 */
2466 if (thread->sched_pri >= BASEPRI_RTQUEUES) {
2467 /*
2468 * If the processor is executing an RT thread with
2469 * an earlier deadline, choose another.
2470 */
2471 if (thread->sched_pri <= processor->current_pri ||
2472 thread->realtime.deadline >= processor->deadline)
2473 processor = choose_processor(pset, thread);
2474 }
2475 else
2476 processor = choose_processor(pset, thread);
2477 }
2478 else {
2479 /*
2480 * No Affinity case:
2481 *
2482 * Utilitize a per task hint to spread threads
2483 * among the available processor sets.
2484 */
2485 task_t task = thread->task;
2486
2487 pset = task->pset_hint;
2488 if (pset == PROCESSOR_SET_NULL)
2489 pset = current_processor()->processor_set;
2490
2491 pset = choose_next_pset(pset);
2492 pset_lock(pset);
2493
2494 processor = choose_processor(pset, thread);
2495 task->pset_hint = processor->processor_set;
2496 }
2497 }
2498 else {
2499 /*
2500 * Bound case:
2501 *
2502 * Unconditionally dispatch on the processor.
2503 */
2504 processor = thread->bound_processor;
2505 pset = processor->processor_set;
2506 pset_lock(pset);
2507 }
2508
2509 /*
2510 * Dispatch the thread on the choosen processor.
2511 */
2512 if (thread->sched_pri >= BASEPRI_RTQUEUES)
2513 realtime_setrun(processor, thread);
2514 else
2515 processor_setrun(processor, thread, options);
2516 }
2517
2518 processor_set_t
2519 task_choose_pset(
2520 task_t task)
2521 {
2522 processor_set_t pset = task->pset_hint;
2523
2524 if (pset != PROCESSOR_SET_NULL)
2525 pset = choose_next_pset(pset);
2526
2527 return (pset);
2528 }
2529
2530 /*
2531 * processor_queue_shutdown:
2532 *
2533 * Shutdown a processor run queue by
2534 * re-dispatching non-bound threads.
2535 *
2536 * Associated pset must be locked, and is
2537 * returned unlocked.
2538 */
2539 void
2540 processor_queue_shutdown(
2541 processor_t processor)
2542 {
2543 processor_set_t pset = processor->processor_set;
2544 run_queue_t rq = &processor->runq;
2545 queue_t queue = rq->queues + rq->highq;
2546 int pri = rq->highq, count = rq->count;
2547 thread_t next, thread;
2548 queue_head_t tqueue;
2549
2550 queue_init(&tqueue);
2551
2552 while (count > 0) {
2553 thread = (thread_t)queue_first(queue);
2554 while (!queue_end(queue, (queue_entry_t)thread)) {
2555 next = (thread_t)queue_next((queue_entry_t)thread);
2556
2557 if (thread->bound_processor == PROCESSOR_NULL) {
2558 remqueue(queue, (queue_entry_t)thread);
2559
2560 thread->runq = PROCESSOR_NULL;
2561 rq->count--;
2562 if (testbit(pri, sched_preempt_pri)) {
2563 rq->urgency--; assert(rq->urgency >= 0);
2564 }
2565 if (queue_empty(queue)) {
2566 if (pri != IDLEPRI)
2567 clrbit(MAXPRI - pri, rq->bitmap);
2568 rq->highq = MAXPRI - ffsbit(rq->bitmap);
2569 }
2570
2571 enqueue_tail(&tqueue, (queue_entry_t)thread);
2572 }
2573 count--;
2574
2575 thread = next;
2576 }
2577
2578 queue--; pri--;
2579 }
2580
2581 pset_unlock(pset);
2582
2583 while ((thread = (thread_t)dequeue_head(&tqueue)) != THREAD_NULL) {
2584 thread_lock(thread);
2585
2586 thread_setrun(thread, SCHED_TAILQ);
2587
2588 thread_unlock(thread);
2589 }
2590 }
2591
2592 /*
2593 * Check for a preemption point in
2594 * the current context.
2595 *
2596 * Called at splsched.
2597 */
2598 ast_t
2599 csw_check(
2600 processor_t processor)
2601 {
2602 ast_t result = AST_NONE;
2603 run_queue_t runq;
2604
2605 if (first_timeslice(processor)) {
2606 runq = &rt_runq;
2607 if (runq->highq >= BASEPRI_RTQUEUES)
2608 return (AST_PREEMPT | AST_URGENT);
2609
2610 if (runq->highq > processor->current_pri) {
2611 if (runq->urgency > 0)
2612 return (AST_PREEMPT | AST_URGENT);
2613
2614 result |= AST_PREEMPT;
2615 }
2616
2617 runq = &processor->runq;
2618 if (runq->highq > processor->current_pri) {
2619 if (runq->urgency > 0)
2620 return (AST_PREEMPT | AST_URGENT);
2621
2622 result |= AST_PREEMPT;
2623 }
2624 }
2625 else {
2626 runq = &rt_runq;
2627 if (runq->highq >= processor->current_pri) {
2628 if (runq->urgency > 0)
2629 return (AST_PREEMPT | AST_URGENT);
2630
2631 result |= AST_PREEMPT;
2632 }
2633
2634 runq = &processor->runq;
2635 if (runq->highq >= processor->current_pri) {
2636 if (runq->urgency > 0)
2637 return (AST_PREEMPT | AST_URGENT);
2638
2639 result |= AST_PREEMPT;
2640 }
2641 }
2642
2643 if (result != AST_NONE)
2644 return (result);
2645
2646 if (processor->current_pri < BASEPRI_RTQUEUES && processor->processor_meta != PROCESSOR_META_NULL &&
2647 processor->processor_meta->primary != processor)
2648 return (AST_PREEMPT);
2649
2650 if (machine_cpu_is_inactive(processor->cpu_id))
2651 return (AST_PREEMPT);
2652
2653 if (processor->active_thread->state & TH_SUSP)
2654 return (AST_PREEMPT);
2655
2656 return (AST_NONE);
2657 }
2658
2659 /*
2660 * set_sched_pri:
2661 *
2662 * Set the scheduled priority of the specified thread.
2663 *
2664 * This may cause the thread to change queues.
2665 *
2666 * Thread must be locked.
2667 */
2668 void
2669 set_sched_pri(
2670 thread_t thread,
2671 int priority)
2672 {
2673 boolean_t removed = run_queue_remove(thread);
2674
2675 thread->sched_pri = priority;
2676 if (removed)
2677 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
2678 else
2679 if (thread->state & TH_RUN) {
2680 processor_t processor = thread->last_processor;
2681
2682 if (thread == current_thread()) {
2683 ast_t preempt;
2684
2685 processor->current_pri = priority;
2686 if ((preempt = csw_check(processor)) != AST_NONE)
2687 ast_on(preempt);
2688 }
2689 else
2690 if ( processor != PROCESSOR_NULL &&
2691 processor->active_thread == thread )
2692 cause_ast_check(processor);
2693 }
2694 }
2695
2696 #if 0
2697
2698 static void
2699 run_queue_check(
2700 run_queue_t rq,
2701 thread_t thread)
2702 {
2703 queue_t q;
2704 queue_entry_t qe;
2705
2706 if (rq != thread->runq)
2707 panic("run_queue_check: thread runq");
2708
2709 if (thread->sched_pri > MAXPRI || thread->sched_pri < MINPRI)
2710 panic("run_queue_check: thread sched_pri");
2711
2712 q = &rq->queues[thread->sched_pri];
2713 qe = queue_first(q);
2714 while (!queue_end(q, qe)) {
2715 if (qe == (queue_entry_t)thread)
2716 return;
2717
2718 qe = queue_next(qe);
2719 }
2720
2721 panic("run_queue_check: end");
2722 }
2723
2724 #endif /* DEBUG */
2725
2726 /*
2727 * run_queue_remove:
2728 *
2729 * Remove a thread from a current run queue and
2730 * return TRUE if successful.
2731 *
2732 * Thread must be locked.
2733 */
2734 boolean_t
2735 run_queue_remove(
2736 thread_t thread)
2737 {
2738 processor_t processor = thread->runq;
2739
2740 /*
2741 * If processor is PROCESSOR_NULL, the thread will stay out of the
2742 * run queues because the caller locked the thread. Otherwise
2743 * the thread is on a run queue, but could be chosen for dispatch
2744 * and removed.
2745 */
2746 if (processor != PROCESSOR_NULL) {
2747 void * rqlock;
2748 run_queue_t rq;
2749
2750 /*
2751 * The processor run queues are locked by the
2752 * processor set. Real-time priorities use a
2753 * global queue with a dedicated lock.
2754 */
2755 if (thread->sched_pri < BASEPRI_RTQUEUES) {
2756 rqlock = &processor->processor_set->sched_lock;
2757 rq = &processor->runq;
2758 }
2759 else {
2760 rqlock = &rt_lock; rq = &rt_runq;
2761 }
2762
2763 simple_lock(rqlock);
2764
2765 if (processor == thread->runq) {
2766 /*
2767 * Thread is on a run queue and we have a lock on
2768 * that run queue.
2769 */
2770 remqueue(&rq->queues[0], (queue_entry_t)thread);
2771 rq->count--;
2772 if (testbit(thread->sched_pri, sched_preempt_pri)) {
2773 rq->urgency--; assert(rq->urgency >= 0);
2774 }
2775
2776 if (queue_empty(rq->queues + thread->sched_pri)) {
2777 /* update run queue status */
2778 if (thread->sched_pri != IDLEPRI)
2779 clrbit(MAXPRI - thread->sched_pri, rq->bitmap);
2780 rq->highq = MAXPRI - ffsbit(rq->bitmap);
2781 }
2782
2783 thread->runq = PROCESSOR_NULL;
2784 }
2785 else {
2786 /*
2787 * The thread left the run queue before we could
2788 * lock the run queue.
2789 */
2790 assert(thread->runq == PROCESSOR_NULL);
2791 processor = PROCESSOR_NULL;
2792 }
2793
2794 simple_unlock(rqlock);
2795 }
2796
2797 return (processor != PROCESSOR_NULL);
2798 }
2799
2800 /*
2801 * steal_processor_thread:
2802 *
2803 * Locate a thread to steal from the processor and
2804 * return it.
2805 *
2806 * Associated pset must be locked. Returns THREAD_NULL
2807 * on failure.
2808 */
2809 static thread_t
2810 steal_processor_thread(
2811 processor_t processor)
2812 {
2813 run_queue_t rq = &processor->runq;
2814 queue_t queue = rq->queues + rq->highq;
2815 int pri = rq->highq, count = rq->count;
2816 thread_t thread;
2817
2818 while (count > 0) {
2819 thread = (thread_t)queue_first(queue);
2820 while (!queue_end(queue, (queue_entry_t)thread)) {
2821 if (thread->bound_processor == PROCESSOR_NULL) {
2822 remqueue(queue, (queue_entry_t)thread);
2823
2824 thread->runq = PROCESSOR_NULL;
2825 rq->count--;
2826 if (testbit(pri, sched_preempt_pri)) {
2827 rq->urgency--; assert(rq->urgency >= 0);
2828 }
2829 if (queue_empty(queue)) {
2830 if (pri != IDLEPRI)
2831 clrbit(MAXPRI - pri, rq->bitmap);
2832 rq->highq = MAXPRI - ffsbit(rq->bitmap);
2833 }
2834
2835 return (thread);
2836 }
2837 count--;
2838
2839 thread = (thread_t)queue_next((queue_entry_t)thread);
2840 }
2841
2842 queue--; pri--;
2843 }
2844
2845 return (THREAD_NULL);
2846 }
2847
2848 /*
2849 * Locate and steal a thread, beginning
2850 * at the pset.
2851 *
2852 * The pset must be locked, and is returned
2853 * unlocked.
2854 *
2855 * Returns the stolen thread, or THREAD_NULL on
2856 * failure.
2857 */
2858 static thread_t
2859 steal_thread(
2860 processor_set_t pset)
2861 {
2862 processor_set_t nset, cset = pset;
2863 processor_t processor;
2864 thread_t thread;
2865
2866 do {
2867 processor = (processor_t)queue_first(&cset->active_queue);
2868 while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) {
2869 if (processor->runq.count > 0) {
2870 thread = steal_processor_thread(processor);
2871 if (thread != THREAD_NULL) {
2872 remqueue(&cset->active_queue, (queue_entry_t)processor);
2873 enqueue_tail(&cset->active_queue, (queue_entry_t)processor);
2874
2875 pset_unlock(cset);
2876
2877 return (thread);
2878 }
2879 }
2880
2881 processor = (processor_t)queue_next((queue_entry_t)processor);
2882 }
2883
2884 nset = next_pset(cset);
2885
2886 if (nset != pset) {
2887 pset_unlock(cset);
2888
2889 cset = nset;
2890 pset_lock(cset);
2891 }
2892 } while (nset != pset);
2893
2894 pset_unlock(cset);
2895
2896 return (THREAD_NULL);
2897 }
2898
2899 /*
2900 * This is the processor idle loop, which just looks for other threads
2901 * to execute. Processor idle threads invoke this without supplying a
2902 * current thread to idle without an asserted wait state.
2903 *
2904 * Returns a the next thread to execute if dispatched directly.
2905 */
2906 static thread_t
2907 processor_idle(
2908 thread_t thread,
2909 processor_t processor)
2910 {
2911 processor_set_t pset = processor->processor_set;
2912 thread_t new_thread;
2913 int state;
2914
2915 (void)splsched();
2916
2917 KERNEL_DEBUG_CONSTANT(
2918 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_START, (uintptr_t)thread_tid(thread), 0, 0, 0, 0);
2919
2920 timer_switch(&PROCESSOR_DATA(processor, system_state),
2921 mach_absolute_time(), &PROCESSOR_DATA(processor, idle_state));
2922 PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, idle_state);
2923
2924 while (processor->next_thread == THREAD_NULL && processor->runq.count == 0 && rt_runq.count == 0 &&
2925 (thread == THREAD_NULL || ((thread->state & (TH_WAIT|TH_SUSP)) == TH_WAIT && !thread->wake_active))) {
2926 machine_idle();
2927
2928 (void)splsched();
2929
2930 if (processor->state == PROCESSOR_INACTIVE && !machine_cpu_is_inactive(processor->cpu_id))
2931 break;
2932 }
2933
2934 timer_switch(&PROCESSOR_DATA(processor, idle_state),
2935 mach_absolute_time(), &PROCESSOR_DATA(processor, system_state));
2936 PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, system_state);
2937
2938 pset_lock(pset);
2939
2940 state = processor->state;
2941 if (state == PROCESSOR_DISPATCHING) {
2942 /*
2943 * Commmon case -- cpu dispatched.
2944 */
2945 new_thread = processor->next_thread;
2946 processor->next_thread = THREAD_NULL;
2947 processor->state = PROCESSOR_RUNNING;
2948
2949 if ( processor->runq.highq > new_thread->sched_pri ||
2950 (rt_runq.highq > 0 && rt_runq.highq >= new_thread->sched_pri) ) {
2951 processor->deadline = UINT64_MAX;
2952
2953 pset_unlock(pset);
2954
2955 thread_lock(new_thread);
2956 thread_setrun(new_thread, SCHED_HEADQ);
2957 thread_unlock(new_thread);
2958
2959 KERNEL_DEBUG_CONSTANT(
2960 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, 0, 0, 0);
2961
2962 return (THREAD_NULL);
2963 }
2964
2965 pset_unlock(pset);
2966
2967 KERNEL_DEBUG_CONSTANT(
2968 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, (uintptr_t)thread_tid(new_thread), 0, 0);
2969
2970 return (new_thread);
2971 }
2972 else
2973 if (state == PROCESSOR_IDLE) {
2974 remqueue(&pset->idle_queue, (queue_entry_t)processor);
2975
2976 processor->state = PROCESSOR_RUNNING;
2977 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
2978 }
2979 else
2980 if (state == PROCESSOR_INACTIVE) {
2981 processor->state = PROCESSOR_RUNNING;
2982 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
2983 }
2984 else
2985 if (state == PROCESSOR_SHUTDOWN) {
2986 /*
2987 * Going off-line. Force a
2988 * reschedule.
2989 */
2990 if ((new_thread = processor->next_thread) != THREAD_NULL) {
2991 processor->next_thread = THREAD_NULL;
2992 processor->deadline = UINT64_MAX;
2993
2994 pset_unlock(pset);
2995
2996 thread_lock(new_thread);
2997 thread_setrun(new_thread, SCHED_HEADQ);
2998 thread_unlock(new_thread);
2999
3000 KERNEL_DEBUG_CONSTANT(
3001 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, 0, 0, 0);
3002
3003 return (THREAD_NULL);
3004 }
3005 }
3006
3007 pset_unlock(pset);
3008
3009 KERNEL_DEBUG_CONSTANT(
3010 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, 0, 0, 0);
3011
3012 return (THREAD_NULL);
3013 }
3014
3015 /*
3016 * Each processor has a dedicated thread which
3017 * executes the idle loop when there is no suitable
3018 * previous context.
3019 */
3020 void
3021 idle_thread(void)
3022 {
3023 processor_t processor = current_processor();
3024 thread_t new_thread;
3025
3026 new_thread = processor_idle(THREAD_NULL, processor);
3027 if (new_thread != THREAD_NULL) {
3028 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
3029 /*NOTREACHED*/
3030 }
3031
3032 thread_block((thread_continue_t)idle_thread);
3033 /*NOTREACHED*/
3034 }
3035
3036 kern_return_t
3037 idle_thread_create(
3038 processor_t processor)
3039 {
3040 kern_return_t result;
3041 thread_t thread;
3042 spl_t s;
3043
3044 result = kernel_thread_create((thread_continue_t)idle_thread, NULL, MAXPRI_KERNEL, &thread);
3045 if (result != KERN_SUCCESS)
3046 return (result);
3047
3048 s = splsched();
3049 thread_lock(thread);
3050 thread->bound_processor = processor;
3051 processor->idle_thread = thread;
3052 thread->sched_pri = thread->priority = IDLEPRI;
3053 thread->state = (TH_RUN | TH_IDLE);
3054 thread_unlock(thread);
3055 splx(s);
3056
3057 thread_deallocate(thread);
3058
3059 return (KERN_SUCCESS);
3060 }
3061
3062 static uint64_t sched_tick_deadline;
3063
3064 /*
3065 * sched_startup:
3066 *
3067 * Kicks off scheduler services.
3068 *
3069 * Called at splsched.
3070 */
3071 void
3072 sched_startup(void)
3073 {
3074 kern_return_t result;
3075 thread_t thread;
3076
3077 result = kernel_thread_start_priority((thread_continue_t)sched_tick_thread, NULL, MAXPRI_KERNEL, &thread);
3078 if (result != KERN_SUCCESS)
3079 panic("sched_startup");
3080
3081 thread_deallocate(thread);
3082
3083 /*
3084 * Yield to the sched_tick_thread while it times
3085 * a series of context switches back. It stores
3086 * the baseline value in sched_cswtime.
3087 *
3088 * The current thread is the only other thread
3089 * active at this point.
3090 */
3091 while (sched_cswtime == 0)
3092 thread_block(THREAD_CONTINUE_NULL);
3093
3094 thread_daemon_init();
3095
3096 thread_call_initialize();
3097 }
3098
3099 /*
3100 * sched_tick_thread:
3101 *
3102 * Perform periodic bookkeeping functions about ten
3103 * times per second.
3104 */
3105 static void
3106 sched_tick_continue(void)
3107 {
3108 uint64_t abstime = mach_absolute_time();
3109
3110 sched_tick++;
3111
3112 /*
3113 * Compute various averages.
3114 */
3115 compute_averages();
3116
3117 /*
3118 * Scan the run queues for threads which
3119 * may need to be updated.
3120 */
3121 thread_update_scan();
3122
3123 clock_deadline_for_periodic_event(sched_tick_interval, abstime,
3124 &sched_tick_deadline);
3125
3126 assert_wait_deadline((event_t)sched_tick_thread, THREAD_UNINT, sched_tick_deadline);
3127 thread_block((thread_continue_t)sched_tick_continue);
3128 /*NOTREACHED*/
3129 }
3130
3131 /*
3132 * Time a series of context switches to determine
3133 * a baseline. Toss the high and low and return
3134 * the one-way value.
3135 */
3136 static uint32_t
3137 time_cswitch(void)
3138 {
3139 uint32_t new, hi, low, accum;
3140 uint64_t abstime;
3141 int i, tries = 7;
3142
3143 accum = hi = low = 0;
3144 for (i = 0; i < tries; ++i) {
3145 abstime = mach_absolute_time();
3146 thread_block(THREAD_CONTINUE_NULL);
3147
3148 new = (uint32_t)(mach_absolute_time() - abstime);
3149
3150 if (i == 0)
3151 accum = hi = low = new;
3152 else {
3153 if (new < low)
3154 low = new;
3155 else
3156 if (new > hi)
3157 hi = new;
3158 accum += new;
3159 }
3160 }
3161
3162 return ((accum - hi - low) / (2 * (tries - 2)));
3163 }
3164
3165 void
3166 sched_tick_thread(void)
3167 {
3168 sched_cswtime = time_cswitch();
3169
3170 sched_tick_deadline = mach_absolute_time();
3171
3172 sched_tick_continue();
3173 /*NOTREACHED*/
3174 }
3175
3176 /*
3177 * thread_update_scan / runq_scan:
3178 *
3179 * Scan the run queues to account for timesharing threads
3180 * which need to be updated.
3181 *
3182 * Scanner runs in two passes. Pass one squirrels likely
3183 * threads away in an array, pass two does the update.
3184 *
3185 * This is necessary because the run queue is locked for
3186 * the candidate scan, but the thread is locked for the update.
3187 *
3188 * Array should be sized to make forward progress, without
3189 * disabling preemption for long periods.
3190 */
3191
3192 #define THREAD_UPDATE_SIZE 128
3193
3194 static thread_t thread_update_array[THREAD_UPDATE_SIZE];
3195 static int thread_update_count = 0;
3196
3197 /*
3198 * Scan a runq for candidate threads.
3199 *
3200 * Returns TRUE if retry is needed.
3201 */
3202 static boolean_t
3203 runq_scan(
3204 run_queue_t runq)
3205 {
3206 register int count;
3207 register queue_t q;
3208 register thread_t thread;
3209
3210 if ((count = runq->count) > 0) {
3211 q = runq->queues + runq->highq;
3212 while (count > 0) {
3213 queue_iterate(q, thread, thread_t, links) {
3214 if ( thread->sched_stamp != sched_tick &&
3215 (thread->sched_mode & TH_MODE_TIMESHARE) ) {
3216 if (thread_update_count == THREAD_UPDATE_SIZE)
3217 return (TRUE);
3218
3219 thread_update_array[thread_update_count++] = thread;
3220 thread_reference_internal(thread);
3221 }
3222
3223 count--;
3224 }
3225
3226 q--;
3227 }
3228 }
3229
3230 return (FALSE);
3231 }
3232
3233 static void
3234 thread_update_scan(void)
3235 {
3236 boolean_t restart_needed = FALSE;
3237 processor_t processor = processor_list;
3238 processor_set_t pset;
3239 thread_t thread;
3240 spl_t s;
3241
3242 do {
3243 do {
3244 pset = processor->processor_set;
3245
3246 s = splsched();
3247 pset_lock(pset);
3248
3249 restart_needed = runq_scan(&processor->runq);
3250
3251 pset_unlock(pset);
3252 splx(s);
3253
3254 if (restart_needed)
3255 break;
3256
3257 thread = processor->idle_thread;
3258 if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) {
3259 if (thread_update_count == THREAD_UPDATE_SIZE) {
3260 restart_needed = TRUE;
3261 break;
3262 }
3263
3264 thread_update_array[thread_update_count++] = thread;
3265 thread_reference_internal(thread);
3266 }
3267 } while ((processor = processor->processor_list) != NULL);
3268
3269 /*
3270 * Ok, we now have a collection of candidates -- fix them.
3271 */
3272 while (thread_update_count > 0) {
3273 thread = thread_update_array[--thread_update_count];
3274 thread_update_array[thread_update_count] = THREAD_NULL;
3275
3276 s = splsched();
3277 thread_lock(thread);
3278 if ( !(thread->state & (TH_WAIT|TH_SUSP)) &&
3279 thread->sched_stamp != sched_tick )
3280 update_priority(thread);
3281 thread_unlock(thread);
3282 splx(s);
3283
3284 thread_deallocate(thread);
3285 }
3286 } while (restart_needed);
3287 }
3288
3289 /*
3290 * Just in case someone doesn't use the macro
3291 */
3292 #undef thread_wakeup
3293 void
3294 thread_wakeup(
3295 event_t x);
3296
3297 void
3298 thread_wakeup(
3299 event_t x)
3300 {
3301 thread_wakeup_with_result(x, THREAD_AWAKENED);
3302 }
3303
3304 boolean_t
3305 preemption_enabled(void)
3306 {
3307 return (get_preemption_level() == 0 && ml_get_interrupts_enabled());
3308 }
3309
3310 #if DEBUG
3311 static boolean_t
3312 thread_runnable(
3313 thread_t thread)
3314 {
3315 return ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN);
3316 }
3317 #endif /* DEBUG */
3318
3319 #if MACH_KDB
3320 #include <ddb/db_output.h>
3321 #define printf kdbprintf
3322 void db_sched(void);
3323
3324 void
3325 db_sched(void)
3326 {
3327 iprintf("Scheduling Statistics:\n");
3328 db_indent += 2;
3329 iprintf("Thread invocations: csw %d same %d\n",
3330 c_thread_invoke_csw, c_thread_invoke_same);
3331 #if MACH_COUNTERS
3332 iprintf("Thread block: calls %d\n",
3333 c_thread_block_calls);
3334 iprintf("Idle thread:\n\thandoff %d block %d\n",
3335 c_idle_thread_handoff,
3336 c_idle_thread_block);
3337 iprintf("Sched thread blocks: %d\n", c_sched_thread_block);
3338 #endif /* MACH_COUNTERS */
3339 db_indent -= 2;
3340 }
3341
3342 #include <ddb/db_output.h>
3343 void db_show_thread_log(void);
3344
3345 void
3346 db_show_thread_log(void)
3347 {
3348 }
3349 #endif /* MACH_KDB */