]> git.saurik.com Git - apple/xnu.git/blame - osfmk/kern/sched_prim.c
xnu-1228.tar.gz
[apple/xnu.git] / osfmk / kern / sched_prim.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_FREE_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: sched_prim.c
60 * Author: Avadis Tevanian, Jr.
61 * Date: 1986
62 *
63 * Scheduling primitives
64 *
65 */
66
67#include <debug.h>
1c79356b 68#include <mach_kdb.h>
1c79356b
A
69
70#include <ddb/db_output.h>
91447636
A
71
72#include <mach/mach_types.h>
1c79356b 73#include <mach/machine.h>
91447636
A
74#include <mach/policy.h>
75#include <mach/sync_policy.h>
76
1c79356b
A
77#include <machine/machine_routines.h>
78#include <machine/sched_param.h>
0c530ab8 79#include <machine/machine_cpu.h>
91447636
A
80
81#include <kern/kern_types.h>
1c79356b
A
82#include <kern/clock.h>
83#include <kern/counters.h>
84#include <kern/cpu_number.h>
85#include <kern/cpu_data.h>
91447636 86#include <kern/debug.h>
1c79356b
A
87#include <kern/lock.h>
88#include <kern/macro_help.h>
89#include <kern/machine.h>
90#include <kern/misc_protos.h>
91#include <kern/processor.h>
92#include <kern/queue.h>
93#include <kern/sched.h>
94#include <kern/sched_prim.h>
95#include <kern/syscall_subr.h>
96#include <kern/task.h>
97#include <kern/thread.h>
91447636
A
98#include <kern/wait_queue.h>
99
1c79356b
A
100#include <vm/pmap.h>
101#include <vm/vm_kern.h>
102#include <vm/vm_map.h>
91447636 103
1c79356b
A
104#include <sys/kdebug.h>
105
0c530ab8 106#include <kern/pms.h>
3a60a9f5 107
2d21ac55
A
108struct run_queue rt_runq;
109#define RT_RUNQ ((processor_t)-1)
110decl_simple_lock_data(static,rt_lock);
111
0b4e3aa0 112#define DEFAULT_PREEMPTION_RATE 100 /* (1/s) */
1c79356b
A
113int default_preemption_rate = DEFAULT_PREEMPTION_RATE;
114
0b4e3aa0
A
115#define MAX_UNSAFE_QUANTA 800
116int max_unsafe_quanta = MAX_UNSAFE_QUANTA;
117
118#define MAX_POLL_QUANTA 2
119int max_poll_quanta = MAX_POLL_QUANTA;
120
121#define SCHED_POLL_YIELD_SHIFT 4 /* 1/16 */
122int sched_poll_yield_shift = SCHED_POLL_YIELD_SHIFT;
123
55e303ae
A
124uint64_t max_unsafe_computation;
125uint32_t sched_safe_duration;
126uint64_t max_poll_computation;
127
128uint32_t std_quantum;
129uint32_t min_std_quantum;
130
91447636
A
131uint32_t std_quantum_us;
132
55e303ae
A
133uint32_t max_rt_quantum;
134uint32_t min_rt_quantum;
135
91447636
A
136uint32_t sched_cswtime;
137
1c79356b 138unsigned sched_tick;
91447636 139uint32_t sched_tick_interval;
1c79356b 140
2d21ac55
A
141uint32_t sched_pri_shift = INT8_MAX;
142uint32_t sched_fixed_shift;
143
144uint32_t sched_run_count, sched_share_count;
145uint32_t sched_load_average, sched_mach_factor;
146
147void (*pm_tick_callout)(void) = NULL;
1c79356b
A
148
149/* Forwards */
2d21ac55
A
150void wait_queues_init(void) __attribute__((section("__TEXT, initcode")));
151
152static void load_shift_init(void) __attribute__((section("__TEXT, initcode")));
153
154static thread_t thread_select_idle(
155 thread_t thread,
156 processor_t processor);
1c79356b 157
2d21ac55
A
158static thread_t processor_idle(
159 thread_t thread,
160 processor_t processor);
91447636 161
55e303ae 162static thread_t choose_thread(
2d21ac55
A
163 processor_t processor);
164
165static thread_t steal_thread(
55e303ae 166 processor_t processor);
1c79356b 167
91447636 168static void thread_update_scan(void);
1c79356b 169
2d21ac55
A
170#if DEBUG
171extern int debug_task;
172#define TLOG(a, fmt, args...) if(debug_task & a) kprintf(fmt, ## args)
173#else
174#define TLOG(a, fmt, args...) do {} while (0)
175#endif
176
1c79356b 177#if DEBUG
0b4e3aa0 178static
1c79356b
A
179boolean_t thread_runnable(
180 thread_t thread);
181
0b4e3aa0
A
182#endif /*DEBUG*/
183
184
2d21ac55 185
1c79356b
A
186/*
187 * State machine
188 *
189 * states are combinations of:
190 * R running
191 * W waiting (or on wait queue)
192 * N non-interruptible
193 * O swapped out
194 * I being swapped in
195 *
196 * init action
197 * assert_wait thread_block clear_wait swapout swapin
198 *
199 * R RW, RWN R; setrun - -
200 * RN RWN RN; setrun - -
201 *
202 * RW W R -
203 * RWN WN RN -
204 *
205 * W R; setrun WO
206 * WN RN; setrun -
207 *
208 * RO - - R
209 *
210 */
211
212/*
213 * Waiting protocols and implementation:
214 *
215 * Each thread may be waiting for exactly one event; this event
216 * is set using assert_wait(). That thread may be awakened either
217 * by performing a thread_wakeup_prim() on its event,
218 * or by directly waking that thread up with clear_wait().
219 *
220 * The implementation of wait events uses a hash table. Each
221 * bucket is queue of threads having the same hash function
222 * value; the chain for the queue (linked list) is the run queue
223 * field. [It is not possible to be waiting and runnable at the
224 * same time.]
225 *
226 * Locks on both the thread and on the hash buckets govern the
227 * wait event field and the queue chain field. Because wakeup
228 * operations only have the event as an argument, the event hash
229 * bucket must be locked before any thread.
230 *
231 * Scheduling operations may also occur at interrupt level; therefore,
232 * interrupts below splsched() must be prevented when holding
233 * thread or hash bucket locks.
234 *
235 * The wait event hash table declarations are as follows:
236 */
237
238#define NUMQUEUES 59
239
240struct wait_queue wait_queues[NUMQUEUES];
241
242#define wait_hash(event) \
243 ((((int)(event) < 0)? ~(int)(event): (int)(event)) % NUMQUEUES)
244
91447636
A
245int8_t sched_load_shifts[NRQS];
246
1c79356b
A
247void
248sched_init(void)
249{
250 /*
0b4e3aa0
A
251 * Calculate the timeslicing quantum
252 * in us.
1c79356b
A
253 */
254 if (default_preemption_rate < 1)
255 default_preemption_rate = DEFAULT_PREEMPTION_RATE;
0b4e3aa0 256 std_quantum_us = (1000 * 1000) / default_preemption_rate;
1c79356b 257
0b4e3aa0 258 printf("standard timeslicing quantum is %d us\n", std_quantum_us);
1c79356b 259
55e303ae
A
260 sched_safe_duration = (2 * max_unsafe_quanta / default_preemption_rate) *
261 (1 << SCHED_TICK_SHIFT);
262
1c79356b 263 wait_queues_init();
91447636 264 load_shift_init();
2d21ac55
A
265 simple_lock_init(&rt_lock, 0);
266 run_queue_init(&rt_runq);
1c79356b 267 sched_tick = 0;
1c79356b 268 ast_init();
1c79356b
A
269}
270
55e303ae
A
271void
272sched_timebase_init(void)
273{
91447636
A
274 uint64_t abstime;
275 uint32_t shift;
55e303ae 276
91447636 277 /* standard timeslicing quantum */
55e303ae
A
278 clock_interval_to_absolutetime_interval(
279 std_quantum_us, NSEC_PER_USEC, &abstime);
280 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
281 std_quantum = abstime;
282
91447636 283 /* smallest remaining quantum (250 us) */
55e303ae
A
284 clock_interval_to_absolutetime_interval(250, NSEC_PER_USEC, &abstime);
285 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
286 min_std_quantum = abstime;
287
91447636 288 /* smallest rt computaton (50 us) */
55e303ae
A
289 clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime);
290 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
291 min_rt_quantum = abstime;
292
91447636 293 /* maximum rt computation (50 ms) */
55e303ae
A
294 clock_interval_to_absolutetime_interval(
295 50, 1000*NSEC_PER_USEC, &abstime);
296 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
297 max_rt_quantum = abstime;
298
91447636
A
299 /* scheduler tick interval */
300 clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT,
301 NSEC_PER_USEC, &abstime);
55e303ae
A
302 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
303 sched_tick_interval = abstime;
304
91447636
A
305 /*
306 * Compute conversion factor from usage to
307 * timesharing priorities with 5/8 ** n aging.
308 */
309 abstime = (abstime * 5) / 3;
310 for (shift = 0; abstime > BASEPRI_DEFAULT; ++shift)
311 abstime >>= 1;
2d21ac55 312 sched_fixed_shift = shift;
91447636 313
55e303ae
A
314 max_unsafe_computation = max_unsafe_quanta * std_quantum;
315 max_poll_computation = max_poll_quanta * std_quantum;
316}
317
1c79356b
A
318void
319wait_queues_init(void)
320{
321 register int i;
322
323 for (i = 0; i < NUMQUEUES; i++) {
324 wait_queue_init(&wait_queues[i], SYNC_POLICY_FIFO);
325 }
326}
327
91447636
A
328/*
329 * Set up values for timeshare
330 * loading factors.
331 */
332static void
333load_shift_init(void)
334{
335 int8_t k, *p = sched_load_shifts;
336 uint32_t i, j;
337
338 *p++ = INT8_MIN; *p++ = 0;
339
340 for (i = j = 2, k = 1; i < NRQS; ++k) {
341 for (j <<= 1; i < j; ++i)
342 *p++ = k;
343 }
344}
345
1c79356b 346/*
0b4e3aa0 347 * Thread wait timer expiration.
1c79356b
A
348 */
349void
350thread_timer_expire(
91447636
A
351 void *p0,
352 __unused void *p1)
1c79356b
A
353{
354 thread_t thread = p0;
355 spl_t s;
356
357 s = splsched();
55e303ae 358 thread_lock(thread);
91447636 359 if (--thread->wait_timer_active == 0) {
0b4e3aa0
A
360 if (thread->wait_timer_is_set) {
361 thread->wait_timer_is_set = FALSE;
55e303ae 362 clear_wait_internal(thread, THREAD_TIMED_OUT);
0b4e3aa0 363 }
1c79356b 364 }
55e303ae 365 thread_unlock(thread);
1c79356b
A
366 splx(s);
367}
368
369/*
370 * thread_set_timer:
371 *
372 * Set a timer for the current thread, if the thread
373 * is ready to wait. Must be called between assert_wait()
374 * and thread_block().
375 */
376void
377thread_set_timer(
0b4e3aa0
A
378 uint32_t interval,
379 uint32_t scale_factor)
1c79356b
A
380{
381 thread_t thread = current_thread();
0b4e3aa0 382 uint64_t deadline;
1c79356b
A
383 spl_t s;
384
385 s = splsched();
1c79356b
A
386 thread_lock(thread);
387 if ((thread->state & TH_WAIT) != 0) {
388 clock_interval_to_deadline(interval, scale_factor, &deadline);
91447636
A
389 if (!timer_call_enter(&thread->wait_timer, deadline))
390 thread->wait_timer_active++;
1c79356b
A
391 thread->wait_timer_is_set = TRUE;
392 }
393 thread_unlock(thread);
1c79356b
A
394 splx(s);
395}
396
397void
398thread_set_timer_deadline(
0b4e3aa0 399 uint64_t deadline)
1c79356b
A
400{
401 thread_t thread = current_thread();
402 spl_t s;
403
404 s = splsched();
1c79356b
A
405 thread_lock(thread);
406 if ((thread->state & TH_WAIT) != 0) {
91447636
A
407 if (!timer_call_enter(&thread->wait_timer, deadline))
408 thread->wait_timer_active++;
1c79356b
A
409 thread->wait_timer_is_set = TRUE;
410 }
411 thread_unlock(thread);
1c79356b
A
412 splx(s);
413}
414
415void
416thread_cancel_timer(void)
417{
418 thread_t thread = current_thread();
419 spl_t s;
420
421 s = splsched();
55e303ae 422 thread_lock(thread);
1c79356b
A
423 if (thread->wait_timer_is_set) {
424 if (timer_call_cancel(&thread->wait_timer))
425 thread->wait_timer_active--;
426 thread->wait_timer_is_set = FALSE;
427 }
55e303ae 428 thread_unlock(thread);
1c79356b
A
429 splx(s);
430}
431
1c79356b 432/*
91447636
A
433 * thread_unblock:
434 *
435 * Unblock thread on wake up.
436 *
437 * Returns TRUE if the thread is still running.
438 *
439 * Thread must be locked.
1c79356b 440 */
91447636
A
441boolean_t
442thread_unblock(
443 thread_t thread,
444 wait_result_t wresult)
1c79356b 445{
91447636 446 boolean_t result = FALSE;
0b4e3aa0 447
91447636 448 /*
2d21ac55 449 * Set wait_result.
91447636
A
450 */
451 thread->wait_result = wresult;
1c79356b 452
91447636 453 /*
2d21ac55 454 * Cancel pending wait timer.
91447636 455 */
1c79356b
A
456 if (thread->wait_timer_is_set) {
457 if (timer_call_cancel(&thread->wait_timer))
458 thread->wait_timer_active--;
459 thread->wait_timer_is_set = FALSE;
460 }
461
91447636 462 /*
2d21ac55
A
463 * Update scheduling state: not waiting,
464 * set running.
91447636
A
465 */
466 thread->state &= ~(TH_WAIT|TH_UNINT);
1c79356b 467
91447636
A
468 if (!(thread->state & TH_RUN)) {
469 thread->state |= TH_RUN;
1c79356b 470
2d21ac55 471 (*thread->sched_call)(SCHED_CALL_UNBLOCK, thread);
1c79356b 472
91447636 473 /*
2d21ac55 474 * Update run counts.
91447636 475 */
2d21ac55 476 sched_run_incr();
91447636 477 if (thread->sched_mode & TH_MODE_TIMESHARE)
2d21ac55 478 sched_share_incr();
1c79356b 479 }
2d21ac55
A
480 else {
481 /*
482 * Signal if idling on another processor.
483 */
484 if (thread->state & TH_IDLE) {
485 processor_t processor = thread->last_processor;
486
487 if (processor != current_processor())
488 machine_signal_idle(processor);
489 }
490
91447636 491 result = TRUE;
2d21ac55 492 }
1c79356b 493
91447636
A
494 /*
495 * Calculate deadline for real-time threads.
496 */
497 if (thread->sched_mode & TH_MODE_REALTIME) {
498 thread->realtime.deadline = mach_absolute_time();
499 thread->realtime.deadline += thread->realtime.constraint;
0b4e3aa0
A
500 }
501
91447636
A
502 /*
503 * Clear old quantum, fail-safe computation, etc.
504 */
505 thread->current_quantum = 0;
506 thread->computation_metered = 0;
507 thread->reason = AST_NONE;
1c79356b 508
91447636
A
509 KERNEL_DEBUG_CONSTANT(
510 MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE,
511 (int)thread, (int)thread->sched_pri, 0, 0, 0);
512
513 return (result);
1c79356b
A
514}
515
516/*
91447636 517 * Routine: thread_go
1c79356b 518 * Purpose:
91447636 519 * Unblock and dispatch thread.
1c79356b
A
520 * Conditions:
521 * thread lock held, IPC locks may be held.
522 * thread must have been pulled from wait queue under same lock hold.
9bccf70c
A
523 * Returns:
524 * KERN_SUCCESS - Thread was set running
525 * KERN_NOT_WAITING - Thread was not waiting
1c79356b 526 */
9bccf70c 527kern_return_t
91447636 528thread_go(
1c79356b 529 thread_t thread,
55e303ae 530 wait_result_t wresult)
1c79356b 531{
1c79356b 532 assert(thread->at_safe_point == FALSE);
9bccf70c 533 assert(thread->wait_event == NO_EVENT64);
1c79356b
A
534 assert(thread->wait_queue == WAIT_QUEUE_NULL);
535
9bccf70c 536 if ((thread->state & (TH_WAIT|TH_TERMINATE)) == TH_WAIT) {
91447636 537 if (!thread_unblock(thread, wresult))
55e303ae 538 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
55e303ae
A
539
540 return (KERN_SUCCESS);
1c79356b 541 }
55e303ae
A
542
543 return (KERN_NOT_WAITING);
1c79356b
A
544}
545
9bccf70c
A
546/*
547 * Routine: thread_mark_wait_locked
548 * Purpose:
549 * Mark a thread as waiting. If, given the circumstances,
550 * it doesn't want to wait (i.e. already aborted), then
551 * indicate that in the return value.
552 * Conditions:
553 * at splsched() and thread is locked.
554 */
555__private_extern__
556wait_result_t
1c79356b 557thread_mark_wait_locked(
9bccf70c
A
558 thread_t thread,
559 wait_interrupt_t interruptible)
1c79356b 560{
55e303ae 561 boolean_t at_safe_point;
1c79356b 562
9bccf70c
A
563 /*
564 * The thread may have certain types of interrupts/aborts masked
565 * off. Even if the wait location says these types of interrupts
566 * are OK, we have to honor mask settings (outer-scoped code may
567 * not be able to handle aborts at the moment).
568 */
91447636
A
569 if (interruptible > (thread->options & TH_OPT_INTMASK))
570 interruptible = thread->options & TH_OPT_INTMASK;
9bccf70c
A
571
572 at_safe_point = (interruptible == THREAD_ABORTSAFE);
573
55e303ae 574 if ( interruptible == THREAD_UNINT ||
2d21ac55 575 !(thread->sched_mode & TH_MODE_ABORT) ||
55e303ae 576 (!at_safe_point &&
2d21ac55 577 (thread->sched_mode & TH_MODE_ABORTSAFELY))) {
9bccf70c
A
578 thread->state |= (interruptible) ? TH_WAIT : (TH_WAIT | TH_UNINT);
579 thread->at_safe_point = at_safe_point;
9bccf70c 580 return (thread->wait_result = THREAD_WAITING);
9bccf70c 581 }
55e303ae 582 else
2d21ac55
A
583 if (thread->sched_mode & TH_MODE_ABORTSAFELY)
584 thread->sched_mode &= ~TH_MODE_ISABORTED;
55e303ae 585
9bccf70c 586 return (thread->wait_result = THREAD_INTERRUPTED);
1c79356b
A
587}
588
9bccf70c
A
589/*
590 * Routine: thread_interrupt_level
591 * Purpose:
592 * Set the maximum interruptible state for the
593 * current thread. The effective value of any
594 * interruptible flag passed into assert_wait
595 * will never exceed this.
596 *
597 * Useful for code that must not be interrupted,
598 * but which calls code that doesn't know that.
599 * Returns:
600 * The old interrupt level for the thread.
601 */
602__private_extern__
603wait_interrupt_t
604thread_interrupt_level(
605 wait_interrupt_t new_level)
606{
607 thread_t thread = current_thread();
91447636 608 wait_interrupt_t result = thread->options & TH_OPT_INTMASK;
1c79356b 609
91447636 610 thread->options = (thread->options & ~TH_OPT_INTMASK) | (new_level & TH_OPT_INTMASK);
1c79356b 611
91447636 612 return result;
1c79356b
A
613}
614
615/*
616 * Check to see if an assert wait is possible, without actually doing one.
617 * This is used by debug code in locks and elsewhere to verify that it is
618 * always OK to block when trying to take a blocking lock (since waiting
619 * for the actual assert_wait to catch the case may make it hard to detect
620 * this case.
621 */
622boolean_t
623assert_wait_possible(void)
624{
625
626 thread_t thread;
1c79356b
A
627
628#if DEBUG
629 if(debug_mode) return TRUE; /* Always succeed in debug mode */
630#endif
631
632 thread = current_thread();
633
634 return (thread == NULL || wait_queue_assert_possible(thread));
635}
636
637/*
638 * assert_wait:
639 *
640 * Assert that the current thread is about to go to
641 * sleep until the specified event occurs.
642 */
9bccf70c 643wait_result_t
1c79356b
A
644assert_wait(
645 event_t event,
9bccf70c 646 wait_interrupt_t interruptible)
1c79356b
A
647{
648 register wait_queue_t wq;
649 register int index;
650
651 assert(event != NO_EVENT);
1c79356b
A
652
653 index = wait_hash(event);
654 wq = &wait_queues[index];
91447636 655 return wait_queue_assert_wait(wq, event, interruptible, 0);
9bccf70c
A
656}
657
91447636
A
658wait_result_t
659assert_wait_timeout(
660 event_t event,
661 wait_interrupt_t interruptible,
662 uint32_t interval,
663 uint32_t scale_factor)
55e303ae 664{
91447636
A
665 thread_t thread = current_thread();
666 wait_result_t wresult;
667 wait_queue_t wqueue;
668 uint64_t deadline;
669 spl_t s;
670
55e303ae 671 assert(event != NO_EVENT);
91447636
A
672 wqueue = &wait_queues[wait_hash(event)];
673
674 s = splsched();
675 wait_queue_lock(wqueue);
676 thread_lock(thread);
677
678 clock_interval_to_deadline(interval, scale_factor, &deadline);
679 wresult = wait_queue_assert_wait64_locked(wqueue, (uint32_t)event,
680 interruptible, deadline, thread);
681
682 thread_unlock(thread);
683 wait_queue_unlock(wqueue);
684 splx(s);
55e303ae 685
91447636 686 return (wresult);
55e303ae
A
687}
688
689wait_result_t
91447636 690assert_wait_deadline(
55e303ae 691 event_t event,
91447636
A
692 wait_interrupt_t interruptible,
693 uint64_t deadline)
55e303ae
A
694{
695 thread_t thread = current_thread();
91447636
A
696 wait_result_t wresult;
697 wait_queue_t wqueue;
55e303ae
A
698 spl_t s;
699
700 assert(event != NO_EVENT);
91447636 701 wqueue = &wait_queues[wait_hash(event)];
55e303ae
A
702
703 s = splsched();
91447636 704 wait_queue_lock(wqueue);
55e303ae
A
705 thread_lock(thread);
706
91447636
A
707 wresult = wait_queue_assert_wait64_locked(wqueue, (uint32_t)event,
708 interruptible, deadline, thread);
55e303ae
A
709
710 thread_unlock(thread);
91447636 711 wait_queue_unlock(wqueue);
55e303ae
A
712 splx(s);
713
714 return (wresult);
715}
9bccf70c
A
716
717/*
718 * thread_sleep_fast_usimple_lock:
719 *
720 * Cause the current thread to wait until the specified event
721 * occurs. The specified simple_lock is unlocked before releasing
722 * the cpu and re-acquired as part of waking up.
723 *
724 * This is the simple lock sleep interface for components that use a
725 * faster version of simple_lock() than is provided by usimple_lock().
726 */
727__private_extern__ wait_result_t
728thread_sleep_fast_usimple_lock(
729 event_t event,
730 simple_lock_t lock,
731 wait_interrupt_t interruptible)
732{
733 wait_result_t res;
734
735 res = assert_wait(event, interruptible);
736 if (res == THREAD_WAITING) {
737 simple_unlock(lock);
738 res = thread_block(THREAD_CONTINUE_NULL);
739 simple_lock(lock);
740 }
741 return res;
1c79356b
A
742}
743
9bccf70c
A
744
745/*
746 * thread_sleep_usimple_lock:
747 *
748 * Cause the current thread to wait until the specified event
749 * occurs. The specified usimple_lock is unlocked before releasing
750 * the cpu and re-acquired as part of waking up.
751 *
752 * This is the simple lock sleep interface for components where
753 * simple_lock() is defined in terms of usimple_lock().
754 */
755wait_result_t
756thread_sleep_usimple_lock(
757 event_t event,
758 usimple_lock_t lock,
759 wait_interrupt_t interruptible)
760{
761 wait_result_t res;
762
763 res = assert_wait(event, interruptible);
764 if (res == THREAD_WAITING) {
765 usimple_unlock(lock);
766 res = thread_block(THREAD_CONTINUE_NULL);
767 usimple_lock(lock);
768 }
769 return res;
770}
771
772/*
773 * thread_sleep_mutex:
774 *
775 * Cause the current thread to wait until the specified event
776 * occurs. The specified mutex is unlocked before releasing
777 * the cpu. The mutex will be re-acquired before returning.
778 *
779 * JMM - Add hint to make sure mutex is available before rousting
780 */
781wait_result_t
782thread_sleep_mutex(
783 event_t event,
784 mutex_t *mutex,
785 wait_interrupt_t interruptible)
786{
787 wait_result_t res;
788
789 res = assert_wait(event, interruptible);
790 if (res == THREAD_WAITING) {
791 mutex_unlock(mutex);
792 res = thread_block(THREAD_CONTINUE_NULL);
793 mutex_lock(mutex);
794 }
795 return res;
796}
1c79356b 797
9bccf70c
A
798/*
799 * thread_sleep_mutex_deadline:
800 *
801 * Cause the current thread to wait until the specified event
802 * (or deadline) occurs. The specified mutex is unlocked before
803 * releasing the cpu. The mutex will be re-acquired before returning.
9bccf70c
A
804 */
805wait_result_t
806thread_sleep_mutex_deadline(
807 event_t event,
808 mutex_t *mutex,
809 uint64_t deadline,
810 wait_interrupt_t interruptible)
811{
812 wait_result_t res;
813
91447636 814 res = assert_wait_deadline(event, interruptible, deadline);
9bccf70c
A
815 if (res == THREAD_WAITING) {
816 mutex_unlock(mutex);
9bccf70c 817 res = thread_block(THREAD_CONTINUE_NULL);
9bccf70c
A
818 mutex_lock(mutex);
819 }
820 return res;
821}
822
823/*
824 * thread_sleep_lock_write:
825 *
826 * Cause the current thread to wait until the specified event
827 * occurs. The specified (write) lock is unlocked before releasing
828 * the cpu. The (write) lock will be re-acquired before returning.
9bccf70c
A
829 */
830wait_result_t
831thread_sleep_lock_write(
832 event_t event,
833 lock_t *lock,
834 wait_interrupt_t interruptible)
835{
836 wait_result_t res;
837
838 res = assert_wait(event, interruptible);
839 if (res == THREAD_WAITING) {
840 lock_write_done(lock);
841 res = thread_block(THREAD_CONTINUE_NULL);
842 lock_write(lock);
843 }
844 return res;
845}
846
1c79356b 847/*
91447636 848 * thread_stop:
1c79356b 849 *
91447636
A
850 * Force a preemption point for a thread and wait
851 * for it to stop running. Arbitrates access among
852 * multiple stop requests. (released by unstop)
1c79356b 853 *
91447636
A
854 * The thread must enter a wait state and stop via a
855 * separate means.
1c79356b 856 *
91447636 857 * Returns FALSE if interrupted.
1c79356b
A
858 */
859boolean_t
860thread_stop(
91447636 861 thread_t thread)
1c79356b 862{
91447636 863 wait_result_t wresult;
2d21ac55 864 spl_t s = splsched();
1c79356b 865
1c79356b 866 wake_lock(thread);
2d21ac55 867 thread_lock(thread);
1c79356b
A
868
869 while (thread->state & TH_SUSP) {
870 thread->wake_active = TRUE;
2d21ac55
A
871 thread_unlock(thread);
872
91447636 873 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
1c79356b
A
874 wake_unlock(thread);
875 splx(s);
876
91447636
A
877 if (wresult == THREAD_WAITING)
878 wresult = thread_block(THREAD_CONTINUE_NULL);
9bccf70c 879
91447636 880 if (wresult != THREAD_AWAKENED)
1c79356b
A
881 return (FALSE);
882
883 s = splsched();
884 wake_lock(thread);
2d21ac55 885 thread_lock(thread);
1c79356b 886 }
9bccf70c 887
1c79356b 888 thread->state |= TH_SUSP;
1c79356b 889
9bccf70c 890 while (thread->state & TH_RUN) {
9bccf70c
A
891 processor_t processor = thread->last_processor;
892
2d21ac55 893 if (processor != PROCESSOR_NULL && processor->active_thread == thread)
9bccf70c 894 cause_ast_check(processor);
9bccf70c
A
895
896 thread->wake_active = TRUE;
2d21ac55
A
897 thread_unlock(thread);
898
91447636 899 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
9bccf70c
A
900 wake_unlock(thread);
901 splx(s);
902
91447636
A
903 if (wresult == THREAD_WAITING)
904 wresult = thread_block(THREAD_CONTINUE_NULL);
9bccf70c 905
91447636 906 if (wresult != THREAD_AWAKENED) {
9bccf70c
A
907 thread_unstop(thread);
908 return (FALSE);
909 }
910
911 s = splsched();
912 wake_lock(thread);
913 thread_lock(thread);
914 }
915
916 thread_unlock(thread);
1c79356b
A
917 wake_unlock(thread);
918 splx(s);
919
920 return (TRUE);
921}
922
923/*
91447636
A
924 * thread_unstop:
925 *
926 * Release a previous stop request and set
927 * the thread running if appropriate.
928 *
929 * Use only after a successful stop operation.
1c79356b
A
930 */
931void
932thread_unstop(
9bccf70c 933 thread_t thread)
1c79356b 934{
9bccf70c 935 spl_t s = splsched();
1c79356b 936
1c79356b
A
937 wake_lock(thread);
938 thread_lock(thread);
939
9bccf70c 940 if ((thread->state & (TH_RUN|TH_WAIT|TH_SUSP)) == TH_SUSP) {
0b4e3aa0 941 thread->state &= ~TH_SUSP;
91447636 942 thread_unblock(thread, THREAD_AWAKENED);
55e303ae
A
943
944 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1c79356b
A
945 }
946 else
947 if (thread->state & TH_SUSP) {
948 thread->state &= ~TH_SUSP;
949
950 if (thread->wake_active) {
951 thread->wake_active = FALSE;
952 thread_unlock(thread);
2d21ac55
A
953
954 thread_wakeup(&thread->wake_active);
1c79356b
A
955 wake_unlock(thread);
956 splx(s);
1c79356b
A
957
958 return;
959 }
960 }
961
962 thread_unlock(thread);
963 wake_unlock(thread);
964 splx(s);
965}
966
967/*
91447636
A
968 * thread_wait:
969 *
970 * Wait for a thread to stop running. (non-interruptible)
971 *
1c79356b 972 */
91447636 973void
1c79356b 974thread_wait(
91447636 975 thread_t thread)
1c79356b 976{
91447636
A
977 wait_result_t wresult;
978 spl_t s = splsched();
1c79356b 979
1c79356b 980 wake_lock(thread);
9bccf70c 981 thread_lock(thread);
1c79356b 982
9bccf70c 983 while (thread->state & TH_RUN) {
9bccf70c 984 processor_t processor = thread->last_processor;
e7c99d92 985
2d21ac55 986 if (processor != PROCESSOR_NULL && processor->active_thread == thread)
9bccf70c 987 cause_ast_check(processor);
1c79356b
A
988
989 thread->wake_active = TRUE;
2d21ac55
A
990 thread_unlock(thread);
991
91447636 992 wresult = assert_wait(&thread->wake_active, THREAD_UNINT);
1c79356b
A
993 wake_unlock(thread);
994 splx(s);
995
91447636
A
996 if (wresult == THREAD_WAITING)
997 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
998
999 s = splsched();
1000 wake_lock(thread);
9bccf70c 1001 thread_lock(thread);
1c79356b 1002 }
0b4e3aa0 1003
9bccf70c 1004 thread_unlock(thread);
1c79356b
A
1005 wake_unlock(thread);
1006 splx(s);
1c79356b
A
1007}
1008
1c79356b
A
1009/*
1010 * Routine: clear_wait_internal
1011 *
1012 * Clear the wait condition for the specified thread.
1013 * Start the thread executing if that is appropriate.
1014 * Arguments:
1015 * thread thread to awaken
1016 * result Wakeup result the thread should see
1017 * Conditions:
1018 * At splsched
1019 * the thread is locked.
9bccf70c
A
1020 * Returns:
1021 * KERN_SUCCESS thread was rousted out a wait
1022 * KERN_FAILURE thread was waiting but could not be rousted
1023 * KERN_NOT_WAITING thread was not waiting
1c79356b 1024 */
9bccf70c 1025__private_extern__ kern_return_t
1c79356b 1026clear_wait_internal(
9bccf70c 1027 thread_t thread,
55e303ae 1028 wait_result_t wresult)
1c79356b 1029{
9bccf70c 1030 wait_queue_t wq = thread->wait_queue;
55e303ae 1031 int i = LockTimeOut;
9bccf70c 1032
9bccf70c 1033 do {
55e303ae
A
1034 if (wresult == THREAD_INTERRUPTED && (thread->state & TH_UNINT))
1035 return (KERN_FAILURE);
9bccf70c
A
1036
1037 if (wq != WAIT_QUEUE_NULL) {
1038 if (wait_queue_lock_try(wq)) {
1039 wait_queue_pull_thread_locked(wq, thread, TRUE);
1040 /* wait queue unlocked, thread still locked */
55e303ae
A
1041 }
1042 else {
9bccf70c
A
1043 thread_unlock(thread);
1044 delay(1);
55e303ae 1045
9bccf70c 1046 thread_lock(thread);
55e303ae
A
1047 if (wq != thread->wait_queue)
1048 return (KERN_NOT_WAITING);
9bccf70c 1049
9bccf70c
A
1050 continue;
1051 }
1c79356b 1052 }
55e303ae 1053
91447636 1054 return (thread_go(thread, wresult));
55e303ae
A
1055 } while (--i > 0);
1056
2d21ac55 1057 panic("clear_wait_internal: deadlock: thread=%p, wq=%p, cpu=%d\n",
9bccf70c 1058 thread, wq, cpu_number());
55e303ae
A
1059
1060 return (KERN_FAILURE);
1c79356b
A
1061}
1062
1063
1064/*
1065 * clear_wait:
1066 *
1067 * Clear the wait condition for the specified thread. Start the thread
1068 * executing if that is appropriate.
1069 *
1070 * parameters:
1071 * thread thread to awaken
1072 * result Wakeup result the thread should see
1073 */
9bccf70c 1074kern_return_t
1c79356b 1075clear_wait(
9bccf70c
A
1076 thread_t thread,
1077 wait_result_t result)
1c79356b 1078{
9bccf70c 1079 kern_return_t ret;
1c79356b
A
1080 spl_t s;
1081
1082 s = splsched();
1083 thread_lock(thread);
9bccf70c 1084 ret = clear_wait_internal(thread, result);
1c79356b
A
1085 thread_unlock(thread);
1086 splx(s);
9bccf70c 1087 return ret;
1c79356b
A
1088}
1089
1090
1091/*
1092 * thread_wakeup_prim:
1093 *
1094 * Common routine for thread_wakeup, thread_wakeup_with_result,
1095 * and thread_wakeup_one.
1096 *
1097 */
9bccf70c 1098kern_return_t
1c79356b
A
1099thread_wakeup_prim(
1100 event_t event,
1101 boolean_t one_thread,
9bccf70c 1102 wait_result_t result)
1c79356b
A
1103{
1104 register wait_queue_t wq;
1105 register int index;
1106
1107 index = wait_hash(event);
1108 wq = &wait_queues[index];
1109 if (one_thread)
9bccf70c 1110 return (wait_queue_wakeup_one(wq, event, result));
1c79356b 1111 else
9bccf70c 1112 return (wait_queue_wakeup_all(wq, event, result));
1c79356b
A
1113}
1114
1115/*
1116 * thread_bind:
1117 *
2d21ac55 1118 * Force the current thread to execute on the specified processor.
1c79356b 1119 *
55e303ae
A
1120 * Returns the previous binding. PROCESSOR_NULL means
1121 * not bound.
1122 *
1123 * XXX - DO NOT export this to users - XXX
1c79356b 1124 */
55e303ae 1125processor_t
1c79356b 1126thread_bind(
2d21ac55 1127 processor_t processor)
1c79356b 1128{
2d21ac55 1129 thread_t self = current_thread();
55e303ae 1130 processor_t prev;
55e303ae 1131 spl_t s;
1c79356b
A
1132
1133 s = splsched();
2d21ac55 1134 thread_lock(self);
55e303ae 1135
2d21ac55
A
1136 prev = self->bound_processor;
1137 self->bound_processor = processor;
55e303ae 1138
2d21ac55 1139 thread_unlock(self);
1c79356b 1140 splx(s);
55e303ae
A
1141
1142 return (prev);
1c79356b
A
1143}
1144
1145/*
2d21ac55
A
1146 * thread_select:
1147 *
1148 * Select a new thread for the current processor to execute.
55e303ae
A
1149 *
1150 * May select the current thread, which must be locked.
1c79356b 1151 */
2d21ac55 1152static thread_t
1c79356b 1153thread_select(
2d21ac55
A
1154 thread_t thread,
1155 processor_t processor)
1c79356b 1156{
2d21ac55
A
1157 processor_set_t pset = processor->processor_set;
1158 thread_t new_thread;
1159 boolean_t other_runnable;
1c79356b 1160
2d21ac55
A
1161 do {
1162 /*
1163 * Update the priority.
1164 */
1165 if (thread->sched_stamp != sched_tick)
1166 update_priority(thread);
0b4e3aa0 1167
2d21ac55 1168 processor->current_pri = thread->sched_pri;
1c79356b 1169
2d21ac55
A
1170 pset_lock(pset);
1171
1172 simple_lock(&rt_lock);
1173
1174 /*
1175 * Check for other runnable threads.
1176 */
1177 other_runnable = processor->runq.count > 0 || rt_runq.count > 0;
9bccf70c 1178
2d21ac55
A
1179 /*
1180 * Test to see if the current thread should continue
1181 * to run on this processor. Must be runnable, and not
1182 * bound to a different processor, nor be in the wrong
1183 * processor set.
1184 */
1185 if ( thread->state == TH_RUN &&
1186 (thread->bound_processor == PROCESSOR_NULL ||
1187 thread->bound_processor == processor) &&
1188 (thread->affinity_set == AFFINITY_SET_NULL ||
1189 thread->affinity_set->aset_pset == pset) ) {
1190 if ( thread->sched_pri >= BASEPRI_RTQUEUES &&
1191 first_timeslice(processor) ) {
1192 if (rt_runq.highq >= BASEPRI_RTQUEUES) {
1193 register run_queue_t runq = &rt_runq;
1194 register queue_t q;
1195
1196 q = runq->queues + runq->highq;
1197 if (((thread_t)q->next)->realtime.deadline <
1198 processor->deadline) {
1199 thread = (thread_t)q->next;
1200 ((queue_entry_t)thread)->next->prev = q;
1201 q->next = ((queue_entry_t)thread)->next;
1202 thread->runq = PROCESSOR_NULL;
1203 assert(thread->sched_mode & TH_MODE_PREEMPT);
1204 runq->count--; runq->urgency--;
1205 if (queue_empty(q)) {
1206 if (runq->highq != IDLEPRI)
1207 clrbit(MAXPRI - runq->highq, runq->bitmap);
1208 runq->highq = MAXPRI - ffsbit(runq->bitmap);
1209 }
55e303ae
A
1210 }
1211 }
2d21ac55
A
1212
1213 simple_unlock(&rt_lock);
1214
1215 processor->deadline = thread->realtime.deadline;
1216
1217 pset_unlock(pset);
1218
1219 return (thread);
55e303ae
A
1220 }
1221
2d21ac55
A
1222 if ( (!other_runnable ||
1223 (processor->runq.highq < thread->sched_pri &&
1224 rt_runq.highq < thread->sched_pri)) ) {
55e303ae 1225
2d21ac55 1226 simple_unlock(&rt_lock);
55e303ae 1227
2d21ac55 1228 /* I am the highest priority runnable (non-idle) thread */
1c79356b 1229
2d21ac55
A
1230 pset_hint_low(pset, processor);
1231 pset_hint_high(pset, processor);
1c79356b 1232
2d21ac55 1233 processor->deadline = UINT64_MAX;
55e303ae 1234
2d21ac55 1235 pset_unlock(pset);
55e303ae 1236
2d21ac55
A
1237 return (thread);
1238 }
1239 }
1240
1241 if (other_runnable)
1242 return choose_thread(processor);
1243
1244 simple_unlock(&rt_lock);
55e303ae 1245
2d21ac55
A
1246 /*
1247 * No runnable threads, attempt to steal
1248 * from other processors.
1249 */
1250 if (pset->high_hint != PROCESSOR_NULL && pset->high_hint->runq.count > 0) {
1251 new_thread = steal_thread(pset->high_hint);
1252 if (new_thread != THREAD_NULL) {
1253 pset_unlock(pset);
1254
1255 return (new_thread);
1256 }
55e303ae 1257 }
55e303ae 1258
1c79356b
A
1259 /*
1260 * Nothing is runnable, so set this processor idle if it
2d21ac55 1261 * was running.
1c79356b 1262 */
55e303ae
A
1263 if (processor->state == PROCESSOR_RUNNING) {
1264 remqueue(&pset->active_queue, (queue_entry_t)processor);
1265 processor->state = PROCESSOR_IDLE;
1c79356b 1266
2d21ac55
A
1267 enqueue_head(&pset->idle_queue, (queue_entry_t)processor);
1268 pset->low_hint = processor;
1c79356b
A
1269 pset->idle_count++;
1270 }
1c79356b 1271
55e303ae
A
1272 processor->deadline = UINT64_MAX;
1273
2d21ac55
A
1274 pset_unlock(pset);
1275
1276 /*
1277 * Choose idle thread if fast idle is not possible.
1278 */
1279 if ((thread->state & (TH_IDLE|TH_TERMINATE|TH_SUSP)) || !(thread->state & TH_WAIT) || thread->wake_active)
1280 return (processor->idle_thread);
1281
1282 /*
1283 * Perform idling activities directly without a
1284 * context switch. Return dispatched thread,
1285 * else check again for a runnable thread.
1286 */
1287 new_thread = thread_select_idle(thread, processor);
1288
1289 } while (new_thread == THREAD_NULL);
1290
1291 return (new_thread);
1292}
1293
1294/*
1295 * thread_select_idle:
1296 *
1297 * Idle the processor using the current thread context.
1298 *
1299 * Called with thread locked, then dropped and relocked.
1300 */
1301static thread_t
1302thread_select_idle(
1303 thread_t thread,
1304 processor_t processor)
1305{
1306 thread_t new_thread;
1307
1308 if (thread->sched_mode & TH_MODE_TIMESHARE)
1309 sched_share_decr();
1310 sched_run_decr();
1311
1312 thread->state |= TH_IDLE;
1313 processor->current_pri = IDLEPRI;
1314
1315 thread_unlock(thread);
1316
1317 /*
1318 * Switch execution timing to processor idle thread.
1319 */
1320 processor->last_dispatch = mach_absolute_time();
1321 thread_timer_event(processor->last_dispatch, &processor->idle_thread->system_timer);
1322 PROCESSOR_DATA(processor, kernel_timer) = &processor->idle_thread->system_timer;
1323
1324 /*
1325 * Cancel the quantum timer while idling.
1326 */
1327 timer_call_cancel(&processor->quantum_timer);
1328 processor->timeslice = 0;
1329
1330 (*thread->sched_call)(SCHED_CALL_BLOCK, thread);
1331
1332 /*
1333 * Enable interrupts and perform idling activities. No
1334 * preemption due to TH_IDLE being set.
1335 */
1336 spllo(); new_thread = processor_idle(thread, processor);
1337
1338 (*thread->sched_call)(SCHED_CALL_UNBLOCK, thread);
1339
1340 thread_lock(thread);
1341
1342 /*
1343 * If awakened, switch to thread timer and start a new quantum.
1344 * Otherwise skip; we will context switch to another thread or return here.
1345 */
1346 if (!(thread->state & TH_WAIT)) {
1347 processor->last_dispatch = mach_absolute_time();
1348 thread_timer_event(processor->last_dispatch, &thread->system_timer);
1349 PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
1350
1351 thread_quantum_init(thread);
1352
1353 processor->quantum_end = processor->last_dispatch + thread->current_quantum;
1354 timer_call_enter1(&processor->quantum_timer, thread, processor->quantum_end);
1355 processor->timeslice = 1;
1356
1357 thread->computation_epoch = processor->last_dispatch;
1c79356b
A
1358 }
1359
2d21ac55 1360 thread->state &= ~TH_IDLE;
55e303ae 1361
2d21ac55
A
1362 sched_run_incr();
1363 if (thread->sched_mode & TH_MODE_TIMESHARE)
1364 sched_share_incr();
1365
1366 return (new_thread);
1c79356b
A
1367}
1368
1c79356b 1369/*
55e303ae
A
1370 * Perform a context switch and start executing the new thread.
1371 *
91447636 1372 * Returns FALSE on failure, and the thread is re-dispatched.
9bccf70c 1373 *
55e303ae 1374 * Called at splsched.
1c79356b
A
1375 */
1376
55e303ae
A
1377#define funnel_release_check(thread, debug) \
1378MACRO_BEGIN \
1379 if ((thread)->funnel_state & TH_FN_OWNED) { \
1380 (thread)->funnel_state = TH_FN_REFUNNEL; \
1381 KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE, \
1382 (thread)->funnel_lock, (debug), 0, 0, 0); \
1383 funnel_unlock((thread)->funnel_lock); \
1384 } \
1385MACRO_END
1386
1387#define funnel_refunnel_check(thread, debug) \
1388MACRO_BEGIN \
1389 if ((thread)->funnel_state & TH_FN_REFUNNEL) { \
1390 kern_return_t result = (thread)->wait_result; \
1391 \
1392 (thread)->funnel_state = 0; \
1393 KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE, \
1394 (thread)->funnel_lock, (debug), 0, 0, 0); \
1395 funnel_lock((thread)->funnel_lock); \
1396 KERNEL_DEBUG(0x6032430 | DBG_FUNC_NONE, \
1397 (thread)->funnel_lock, (debug), 0, 0, 0); \
1398 (thread)->funnel_state = TH_FN_OWNED; \
1399 (thread)->wait_result = result; \
1400 } \
1401MACRO_END
1402
2d21ac55 1403static boolean_t
1c79356b 1404thread_invoke(
2d21ac55
A
1405 register thread_t self,
1406 register thread_t thread,
91447636 1407 ast_t reason)
1c79356b 1408{
2d21ac55
A
1409 thread_continue_t continuation = self->continuation;
1410 void *parameter = self->parameter;
9bccf70c 1411 processor_t processor;
1c79356b 1412
9bccf70c 1413 if (get_preemption_level() != 0)
0b4e3aa0 1414 panic("thread_invoke: preemption_level %d\n",
2d21ac55 1415 get_preemption_level());
0b4e3aa0 1416
2d21ac55 1417 assert(self == current_thread());
91447636 1418
1c79356b 1419 /*
9bccf70c 1420 * Mark thread interruptible.
1c79356b 1421 */
2d21ac55
A
1422 thread_lock(thread);
1423 thread->state &= ~TH_UNINT;
1c79356b 1424
2d21ac55
A
1425#if DEBUG
1426 assert(thread_runnable(thread));
1427#endif
1c79356b 1428
9bccf70c
A
1429 /*
1430 * Allow time constraint threads to hang onto
1431 * a stack.
1432 */
2d21ac55
A
1433 if ((self->sched_mode & TH_MODE_REALTIME) && !self->reserved_stack)
1434 self->reserved_stack = self->kernel_stack;
1c79356b 1435
91447636 1436 if (continuation != NULL) {
2d21ac55 1437 if (!thread->kernel_stack) {
9bccf70c 1438 /*
2d21ac55 1439 * If we are using a privileged stack,
9bccf70c 1440 * check to see whether we can exchange it with
2d21ac55 1441 * that of the other thread.
9bccf70c 1442 */
2d21ac55 1443 if (self->kernel_stack == self->reserved_stack && !thread->reserved_stack)
9bccf70c 1444 goto need_stack;
1c79356b 1445
91447636
A
1446 /*
1447 * Context switch by performing a stack handoff.
1448 */
2d21ac55
A
1449 continuation = thread->continuation;
1450 parameter = thread->parameter;
1c79356b 1451
9bccf70c 1452 processor = current_processor();
2d21ac55
A
1453 processor->active_thread = thread;
1454 processor->current_pri = thread->sched_pri;
1455 if (thread->last_processor != processor && thread->last_processor != NULL) {
1456 if (thread->last_processor->processor_set != processor->processor_set)
1457 thread->ps_switch++;
1458 thread->p_switch++;
1459 }
1460 thread->last_processor = processor;
1461 thread->c_switch++;
1462 ast_context(thread);
1463 thread_unlock(thread);
1c79356b 1464
2d21ac55 1465 self->reason = reason;
91447636
A
1466
1467 processor->last_dispatch = mach_absolute_time();
2d21ac55
A
1468 thread_timer_event(processor->last_dispatch, &thread->system_timer);
1469 PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
1470
1471 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_HANDOFF)|DBG_FUNC_NONE,
1472 self->reason, (int)thread, self->sched_pri, thread->sched_pri, 0);
1c79356b 1473
2d21ac55
A
1474TLOG(1, "thread_invoke: calling machine_stack_handoff\n");
1475 machine_stack_handoff(self, thread);
9bccf70c 1476
2d21ac55 1477 thread_dispatch(self, thread);
1c79356b 1478
2d21ac55 1479 thread->continuation = thread->parameter = NULL;
1c79356b 1480
2d21ac55 1481 counter(c_thread_invoke_hits++);
1c79356b 1482
2d21ac55 1483 funnel_refunnel_check(thread, 2);
9bccf70c 1484 (void) spllo();
1c79356b 1485
2d21ac55
A
1486 assert(continuation);
1487 call_continuation(continuation, parameter, thread->wait_result);
9bccf70c 1488 /*NOTREACHED*/
9bccf70c 1489 }
2d21ac55 1490 else if (thread == self) {
9bccf70c 1491 /* same thread but with continuation */
2d21ac55 1492 ast_context(self);
9bccf70c 1493 counter(++c_thread_invoke_same);
2d21ac55 1494 thread_unlock(self);
9bccf70c 1495
2d21ac55
A
1496 self->continuation = self->parameter = NULL;
1497
1498 funnel_refunnel_check(self, 3);
9bccf70c 1499 (void) spllo();
55e303ae 1500
2d21ac55 1501 call_continuation(continuation, parameter, self->wait_result);
9bccf70c
A
1502 /*NOTREACHED*/
1503 }
1c79356b 1504 }
9bccf70c
A
1505 else {
1506 /*
2d21ac55 1507 * Check that the other thread has a stack
9bccf70c 1508 */
2d21ac55 1509 if (!thread->kernel_stack) {
9bccf70c 1510need_stack:
2d21ac55
A
1511 if (!stack_alloc_try(thread)) {
1512 counter(c_thread_invoke_misses++);
1513 thread_unlock(thread);
1514 thread_stack_enqueue(thread);
9bccf70c
A
1515 return (FALSE);
1516 }
9bccf70c 1517 }
2d21ac55
A
1518 else if (thread == self) {
1519 ast_context(self);
9bccf70c 1520 counter(++c_thread_invoke_same);
2d21ac55 1521 thread_unlock(self);
9bccf70c
A
1522 return (TRUE);
1523 }
1524 }
1c79356b
A
1525
1526 /*
91447636 1527 * Context switch by full context save.
1c79356b 1528 */
9bccf70c 1529 processor = current_processor();
2d21ac55
A
1530 processor->active_thread = thread;
1531 processor->current_pri = thread->sched_pri;
1532 if (thread->last_processor != processor && thread->last_processor != NULL) {
1533 if (thread->last_processor->processor_set != processor->processor_set)
1534 thread->ps_switch++;
1535 thread->p_switch++;
1536 }
1537 thread->last_processor = processor;
1538 thread->c_switch++;
1539 ast_context(thread);
1540 thread_unlock(thread);
1c79356b 1541
2d21ac55 1542 counter(c_thread_invoke_csw++);
1c79356b 1543
2d21ac55
A
1544 assert(self->runq == PROCESSOR_NULL);
1545 self->reason = reason;
1c79356b 1546
91447636 1547 processor->last_dispatch = mach_absolute_time();
2d21ac55
A
1548 thread_timer_event(processor->last_dispatch, &thread->system_timer);
1549 PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
91447636 1550
2d21ac55
A
1551 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
1552 (int)self->reason, (int)thread, self->sched_pri, thread->sched_pri, 0);
1c79356b
A
1553
1554 /*
91447636 1555 * This is where we actually switch register context,
2d21ac55
A
1556 * and address space if required. We will next run
1557 * as a result of a subsequent context switch.
91447636 1558 */
2d21ac55
A
1559 thread = machine_switch_context(self, continuation, thread);
1560TLOG(1,"thread_invoke: returning machine_switch_context: self %p continuation %p thread %p\n", self, continuation, thread);
1c79356b
A
1561
1562 /*
2d21ac55 1563 * We have been resumed and are set to run.
1c79356b 1564 */
2d21ac55 1565 thread_dispatch(thread, self);
9bccf70c 1566
91447636 1567 if (continuation) {
2d21ac55
A
1568 self->continuation = self->parameter = NULL;
1569
1570 funnel_refunnel_check(self, 3);
9bccf70c 1571 (void) spllo();
55e303ae 1572
2d21ac55 1573 call_continuation(continuation, parameter, self->wait_result);
9bccf70c 1574 /*NOTREACHED*/
1c79356b
A
1575 }
1576
9bccf70c 1577 return (TRUE);
1c79356b
A
1578}
1579
1580/*
2d21ac55 1581 * thread_dispatch:
1c79356b 1582 *
2d21ac55
A
1583 * Handle threads at context switch. Re-dispatch other thread
1584 * if still running, otherwise update run state and perform
1585 * special actions. Update quantum for other thread and begin
1586 * the quantum for ourselves.
91447636
A
1587 *
1588 * Called at splsched.
1c79356b
A
1589 */
1590void
2d21ac55
A
1591thread_dispatch(
1592 thread_t thread,
1593 thread_t self)
1c79356b 1594{
2d21ac55
A
1595 processor_t processor = self->last_processor;
1596
1597 if (thread != THREAD_NULL) {
91447636 1598 /*
2d21ac55
A
1599 * If blocked at a continuation, discard
1600 * the stack.
91447636 1601 */
2d21ac55
A
1602 if (thread->continuation != NULL && thread->kernel_stack != 0)
1603 stack_free(thread);
1604
1605 if (!(thread->state & TH_IDLE)) {
1606 wake_lock(thread);
1607 thread_lock(thread);
9bccf70c 1608
91447636 1609 /*
2d21ac55 1610 * Compute remainder of current quantum.
91447636 1611 */
2d21ac55
A
1612 if ( first_timeslice(processor) &&
1613 processor->quantum_end > processor->last_dispatch )
1614 thread->current_quantum = (processor->quantum_end - processor->last_dispatch);
1615 else
1616 thread->current_quantum = 0;
1617
1618 if (thread->sched_mode & TH_MODE_REALTIME) {
1619 /*
1620 * Cancel the deadline if the thread has
1621 * consumed the entire quantum.
1622 */
1623 if (thread->current_quantum == 0) {
1624 thread->realtime.deadline = UINT64_MAX;
1625 thread->reason |= AST_QUANTUM;
1626 }
91447636 1627 }
2d21ac55
A
1628 else {
1629 /*
1630 * For non-realtime threads treat a tiny
1631 * remaining quantum as an expired quantum
1632 * but include what's left next time.
1633 */
1634 if (thread->current_quantum < min_std_quantum) {
1635 thread->reason |= AST_QUANTUM;
1636 thread->current_quantum += std_quantum;
1637 }
1638 }
1639
91447636 1640 /*
2d21ac55
A
1641 * If we are doing a direct handoff then
1642 * take the remainder of the quantum.
91447636 1643 */
2d21ac55
A
1644 if ((thread->reason & (AST_HANDOFF|AST_QUANTUM)) == AST_HANDOFF) {
1645 self->current_quantum = thread->current_quantum;
1646 thread->reason |= AST_QUANTUM;
1647 thread->current_quantum = 0;
91447636 1648 }
91447636 1649
2d21ac55
A
1650 thread->last_switch = processor->last_dispatch;
1651
1652 thread->computation_metered += (thread->last_switch - thread->computation_epoch);
1653
1654 if (!(thread->state & TH_WAIT)) {
1655 /*
1656 * Still running.
1657 */
1658 if (thread->reason & AST_QUANTUM)
1659 thread_setrun(thread, SCHED_TAILQ);
1660 else
1661 if (thread->reason & AST_PREEMPT)
1662 thread_setrun(thread, SCHED_HEADQ);
1663 else
1664 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1665
1666 thread->reason = AST_NONE;
1667
1668 thread_unlock(thread);
1669 wake_unlock(thread);
1670 }
1671 else {
1672 /*
1673 * Waiting.
1674 */
1675 thread->state &= ~TH_RUN;
1676
1677 if (thread->sched_mode & TH_MODE_TIMESHARE)
1678 sched_share_decr();
1679 sched_run_decr();
1680
1681 if (thread->wake_active) {
1682 thread->wake_active = FALSE;
1683 thread_unlock(thread);
1684
1685 thread_wakeup(&thread->wake_active);
1686 }
1687 else
1688 thread_unlock(thread);
91447636 1689
2d21ac55 1690 wake_unlock(thread);
91447636 1691
2d21ac55
A
1692 (*thread->sched_call)(SCHED_CALL_BLOCK, thread);
1693
1694 if (thread->state & TH_TERMINATE)
1695 thread_terminate_enqueue(thread);
1696 }
1697 }
91447636 1698 }
91447636 1699
2d21ac55 1700 if (!(self->state & TH_IDLE)) {
91447636 1701 /*
2d21ac55 1702 * Get a new quantum if none remaining.
91447636 1703 */
2d21ac55
A
1704 if (self->current_quantum == 0)
1705 thread_quantum_init(self);
91447636
A
1706
1707 /*
2d21ac55 1708 * Set up quantum timer and timeslice.
91447636 1709 */
2d21ac55
A
1710 processor->quantum_end = (processor->last_dispatch + self->current_quantum);
1711 timer_call_enter1(&processor->quantum_timer, self, processor->quantum_end);
91447636 1712
2d21ac55 1713 processor->timeslice = 1;
91447636 1714
2d21ac55 1715 self->last_switch = processor->last_dispatch;
91447636 1716
2d21ac55 1717 self->computation_epoch = self->last_switch;
91447636
A
1718 }
1719 else {
1720 timer_call_cancel(&processor->quantum_timer);
2d21ac55 1721 processor->timeslice = 0;
91447636
A
1722 }
1723}
1724
1725/*
2d21ac55 1726 * thread_block_reason:
91447636 1727 *
2d21ac55
A
1728 * Forces a reschedule, blocking the caller if a wait
1729 * has been asserted.
91447636 1730 *
2d21ac55
A
1731 * If a continuation is specified, then thread_invoke will
1732 * attempt to discard the thread's kernel stack. When the
1733 * thread resumes, it will execute the continuation function
1734 * on a new kernel stack.
91447636 1735 */
2d21ac55
A
1736counter(mach_counter_t c_thread_block_calls = 0;)
1737
1738wait_result_t
1739thread_block_reason(
1740 thread_continue_t continuation,
1741 void *parameter,
1742 ast_t reason)
91447636 1743{
2d21ac55
A
1744 register thread_t self = current_thread();
1745 register processor_t processor;
1746 register thread_t new_thread;
1747 spl_t s;
1c79356b
A
1748
1749 counter(++c_thread_block_calls);
1750
1c79356b
A
1751 s = splsched();
1752
55e303ae 1753 if (!(reason & AST_PREEMPT))
91447636 1754 funnel_release_check(self, 2);
1c79356b 1755
55e303ae 1756 processor = current_processor();
1c79356b 1757
9bccf70c
A
1758 /* If we're explicitly yielding, force a subsequent quantum */
1759 if (reason & AST_YIELD)
55e303ae 1760 processor->timeslice = 0;
0b4e3aa0 1761
9bccf70c
A
1762 /* We're handling all scheduling AST's */
1763 ast_off(AST_SCHEDULING);
1c79356b 1764
91447636
A
1765 self->continuation = continuation;
1766 self->parameter = parameter;
1767
2d21ac55 1768 do {
91447636 1769 thread_lock(self);
2d21ac55 1770 new_thread = thread_select(self, processor);
91447636 1771 thread_unlock(self);
2d21ac55 1772 } while (!thread_invoke(self, new_thread, reason));
1c79356b 1773
91447636 1774 funnel_refunnel_check(self, 5);
1c79356b
A
1775 splx(s);
1776
91447636 1777 return (self->wait_result);
1c79356b
A
1778}
1779
1780/*
1781 * thread_block:
1782 *
9bccf70c 1783 * Block the current thread if a wait has been asserted.
1c79356b 1784 */
91447636 1785wait_result_t
1c79356b 1786thread_block(
9bccf70c 1787 thread_continue_t continuation)
1c79356b 1788{
91447636
A
1789 return thread_block_reason(continuation, NULL, AST_NONE);
1790}
1791
1792wait_result_t
1793thread_block_parameter(
1794 thread_continue_t continuation,
1795 void *parameter)
1796{
1797 return thread_block_reason(continuation, parameter, AST_NONE);
1c79356b
A
1798}
1799
1800/*
1801 * thread_run:
1802 *
91447636 1803 * Switch directly from the current thread to the
55e303ae 1804 * new thread, handing off our quantum if appropriate.
9bccf70c
A
1805 *
1806 * New thread must be runnable, and not on a run queue.
1c79356b 1807 *
55e303ae 1808 * Called at splsched.
1c79356b
A
1809 */
1810int
1811thread_run(
91447636 1812 thread_t self,
9bccf70c 1813 thread_continue_t continuation,
91447636 1814 void *parameter,
9bccf70c 1815 thread_t new_thread)
1c79356b 1816{
9bccf70c
A
1817 ast_t handoff = AST_HANDOFF;
1818
91447636 1819 funnel_release_check(self, 3);
9bccf70c 1820
91447636
A
1821 self->continuation = continuation;
1822 self->parameter = parameter;
9bccf70c 1823
91447636 1824 while (!thread_invoke(self, new_thread, handoff)) {
2d21ac55 1825 processor_t processor = current_processor();
9bccf70c 1826
91447636 1827 thread_lock(self);
2d21ac55 1828 new_thread = thread_select(self, processor);
91447636 1829 thread_unlock(self);
9bccf70c
A
1830 handoff = AST_NONE;
1831 }
1832
91447636 1833 funnel_refunnel_check(self, 6);
9bccf70c 1834
91447636 1835 return (self->wait_result);
1c79356b
A
1836}
1837
1838/*
91447636 1839 * thread_continue:
55e303ae 1840 *
91447636
A
1841 * Called at splsched when a thread first receives
1842 * a new stack after a continuation.
1c79356b
A
1843 */
1844void
91447636 1845thread_continue(
2d21ac55 1846 register thread_t thread)
1c79356b 1847{
91447636
A
1848 register thread_t self = current_thread();
1849 register thread_continue_t continuation;
1850 register void *parameter;
1c79356b 1851
91447636 1852 continuation = self->continuation;
91447636 1853 parameter = self->parameter;
9bccf70c 1854
2d21ac55 1855 thread_dispatch(thread, self);
9bccf70c 1856
2d21ac55 1857 self->continuation = self->parameter = NULL;
1c79356b 1858
91447636 1859 funnel_refunnel_check(self, 4);
1c79356b 1860
2d21ac55 1861 if (thread != THREAD_NULL)
91447636 1862 (void)spllo();
9bccf70c 1863
2d21ac55 1864 TLOG(1, "thread_continue: calling call_continuation \n");
91447636
A
1865 call_continuation(continuation, parameter, self->wait_result);
1866 /*NOTREACHED*/
1c79356b
A
1867}
1868
1869/*
2d21ac55 1870 * run_queue_init:
55e303ae 1871 *
2d21ac55 1872 * Initialize a run queue before first use.
1c79356b 1873 */
2d21ac55
A
1874void
1875run_queue_init(
1876 run_queue_t rq)
1877{
1878 int i;
1879
1880 rq->highq = IDLEPRI;
1881 for (i = 0; i < NRQBM; i++)
1882 rq->bitmap[i] = 0;
1883 setbit(MAXPRI - IDLEPRI, rq->bitmap);
1884 rq->urgency = rq->count = 0;
1885 for (i = 0; i < NRQS; i++)
1886 queue_init(&rq->queues[i]);
1887}
1c79356b 1888
2d21ac55
A
1889/*
1890 * run_queue_dequeue:
1891 *
1892 * Perform a dequeue operation on a run queue,
1893 * and return the resulting thread.
1894 *
1895 * The run queue must be locked (see run_queue_remove()
1896 * for more info), and not empty.
1897 */
1898static thread_t
1899run_queue_dequeue(
1900 run_queue_t rq,
1901 integer_t options)
1902{
1903 thread_t thread;
1904 queue_t queue = rq->queues + rq->highq;
9bccf70c 1905
2d21ac55
A
1906 if (options & SCHED_HEADQ) {
1907 thread = (thread_t)queue->next;
1908 ((queue_entry_t)thread)->next->prev = queue;
1909 queue->next = ((queue_entry_t)thread)->next;
1910 }
1911 else {
1912 thread = (thread_t)queue->prev;
1913 ((queue_entry_t)thread)->prev->next = queue;
1914 queue->prev = ((queue_entry_t)thread)->prev;
9bccf70c 1915 }
1c79356b 1916
2d21ac55
A
1917 thread->runq = PROCESSOR_NULL;
1918 rq->count--;
9bccf70c 1919 if (thread->sched_mode & TH_MODE_PREEMPT)
2d21ac55
A
1920 rq->urgency--;
1921 if (queue_empty(queue)) {
1922 if (rq->highq != IDLEPRI)
1923 clrbit(MAXPRI - rq->highq, rq->bitmap);
1924 rq->highq = MAXPRI - ffsbit(rq->bitmap);
1925 }
1c79356b 1926
2d21ac55 1927 return (thread);
1c79356b
A
1928}
1929
1930/*
2d21ac55
A
1931 * realtime_queue_insert:
1932 *
1933 * Enqueue a thread for realtime execution.
1c79356b 1934 */
2d21ac55
A
1935static boolean_t
1936realtime_queue_insert(
1937 thread_t thread)
1c79356b 1938{
2d21ac55
A
1939 run_queue_t rq = &rt_runq;
1940 queue_t queue = rq->queues + thread->sched_pri;
1941 uint64_t deadline = thread->realtime.deadline;
1942 boolean_t preempt = FALSE;
1c79356b 1943
2d21ac55 1944 simple_lock(&rt_lock);
1c79356b 1945
55e303ae
A
1946 if (queue_empty(queue)) {
1947 enqueue_tail(queue, (queue_entry_t)thread);
1948
2d21ac55
A
1949 setbit(MAXPRI - thread->sched_pri, rq->bitmap);
1950 if (thread->sched_pri > rq->highq)
1951 rq->highq = thread->sched_pri;
1952 preempt = TRUE;
55e303ae
A
1953 }
1954 else {
1955 register thread_t entry = (thread_t)queue_first(queue);
1956
1957 while (TRUE) {
1958 if ( queue_end(queue, (queue_entry_t)entry) ||
1959 deadline < entry->realtime.deadline ) {
1960 entry = (thread_t)queue_prev((queue_entry_t)entry);
1961 break;
1962 }
1963
1964 entry = (thread_t)queue_next((queue_entry_t)entry);
1965 }
1966
1967 if ((queue_entry_t)entry == queue)
2d21ac55 1968 preempt = TRUE;
55e303ae
A
1969
1970 insque((queue_entry_t)thread, (queue_entry_t)entry);
1971 }
1972
2d21ac55 1973 thread->runq = RT_RUNQ;
55e303ae
A
1974 assert(thread->sched_mode & TH_MODE_PREEMPT);
1975 rq->count++; rq->urgency++;
1976
2d21ac55 1977 simple_unlock(&rt_lock);
55e303ae 1978
2d21ac55
A
1979 return (preempt);
1980}
55e303ae 1981
2d21ac55
A
1982/*
1983 * realtime_setrun:
1984 *
1985 * Dispatch a thread for realtime execution.
1986 *
1987 * Thread must be locked. Associated pset must
1988 * be locked, and is returned unlocked.
1989 */
1990static void
1991realtime_setrun(
1992 processor_t processor,
1993 thread_t thread)
1994{
1995 processor_set_t pset = processor->processor_set;
55e303ae 1996
2d21ac55
A
1997 /*
1998 * Dispatch directly onto idle processor.
1999 */
2000 if (processor->state == PROCESSOR_IDLE) {
2001 remqueue(&pset->idle_queue, (queue_entry_t)processor);
2002 pset->idle_count--;
2003 enqueue_head(&pset->active_queue, (queue_entry_t)processor);
55e303ae 2004
2d21ac55
A
2005 processor->next_thread = thread;
2006 processor->deadline = thread->realtime.deadline;
2007 processor->state = PROCESSOR_DISPATCHING;
2008 pset_unlock(pset);
55e303ae 2009
2d21ac55
A
2010 if (processor != current_processor())
2011 machine_signal_idle(processor);
2012 return;
2013 }
55e303ae 2014
2d21ac55
A
2015 if (realtime_queue_insert(thread)) {
2016 if (processor == current_processor())
2017 ast_on(AST_PREEMPT | AST_URGENT);
2018 else
2019 cause_ast_check(processor);
2020 }
2021
2022 pset_unlock(pset);
2023}
2024
2025/*
2026 * processor_enqueue:
2027 *
2028 * Enqueue thread on a processor run queue. Thread must be locked,
2029 * and not already be on a run queue.
2030 *
2031 * Returns TRUE if a preemption is indicated based on the state
2032 * of the run queue.
2033 *
2034 * The run queue must be locked (see run_queue_remove()
2035 * for more info).
2036 */
2037static boolean_t
2038processor_enqueue(
2039 processor_t processor,
2040 thread_t thread,
2041 integer_t options)
2042{
2043 run_queue_t rq = &processor->runq;
2044 queue_t queue = rq->queues + thread->sched_pri;
2045 boolean_t result = FALSE;
2046
2047 if (queue_empty(queue)) {
2048 enqueue_tail(queue, (queue_entry_t)thread);
2049
2050 setbit(MAXPRI - thread->sched_pri, rq->bitmap);
2051 if (thread->sched_pri > rq->highq) {
2052 rq->highq = thread->sched_pri;
2053 result = TRUE;
55e303ae
A
2054 }
2055 }
2d21ac55
A
2056 else
2057 if (options & SCHED_TAILQ)
2058 enqueue_tail(queue, (queue_entry_t)thread);
2059 else
2060 enqueue_head(queue, (queue_entry_t)thread);
55e303ae 2061
2d21ac55
A
2062 thread->runq = processor;
2063 if (thread->sched_mode & TH_MODE_PREEMPT)
2064 rq->urgency++;
2065 rq->count++;
2066
2067 return (result);
55e303ae
A
2068}
2069
2070/*
2d21ac55 2071 * processor_setrun:
55e303ae 2072 *
2d21ac55
A
2073 * Dispatch a thread for execution on a
2074 * processor.
55e303ae 2075 *
2d21ac55
A
2076 * Thread must be locked. Associated pset must
2077 * be locked, and is returned unlocked.
55e303ae 2078 */
2d21ac55
A
2079static void
2080processor_setrun(
2081 processor_t processor,
2082 thread_t thread,
2083 integer_t options)
55e303ae 2084{
2d21ac55
A
2085 processor_set_t pset = processor->processor_set;
2086 ast_t preempt;
55e303ae 2087
55e303ae 2088 /*
2d21ac55 2089 * Dispatch directly onto idle processor.
55e303ae 2090 */
2d21ac55
A
2091 if (processor->state == PROCESSOR_IDLE) {
2092 remqueue(&pset->idle_queue, (queue_entry_t)processor);
2093 pset->idle_count--;
2094 enqueue_head(&pset->active_queue, (queue_entry_t)processor);
2095
2096 processor->next_thread = thread;
2097 processor->deadline = UINT64_MAX;
2098 processor->state = PROCESSOR_DISPATCHING;
2099 pset_unlock(pset);
2100
2101 if (processor != current_processor())
2102 machine_signal_idle(processor);
2103 return;
2104 }
55e303ae
A
2105
2106 /*
2d21ac55 2107 * Set preemption mode.
1c79356b 2108 */
2d21ac55 2109 if (thread->sched_mode & TH_MODE_PREEMPT)
55e303ae 2110 preempt = (AST_PREEMPT | AST_URGENT);
2d21ac55
A
2111 else
2112 if (thread->sched_mode & TH_MODE_TIMESHARE && thread->priority < BASEPRI_BACKGROUND)
2113 preempt = AST_NONE;
2114 else
2115 preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE;
9bccf70c 2116
2d21ac55
A
2117 if (!processor_enqueue(processor, thread, options))
2118 preempt = AST_NONE;
9bccf70c 2119
2d21ac55
A
2120 pset_hint_high(pset, processor);
2121
2122 if (preempt != AST_NONE) {
2123 if (processor == current_processor()) {
2124 thread_t self = processor->active_thread;
2125
2126 if (csw_needed(self, processor))
2127 ast_on(preempt);
9bccf70c
A
2128 }
2129 else
2d21ac55
A
2130 if ( (processor->state == PROCESSOR_RUNNING ||
2131 processor->state == PROCESSOR_SHUTDOWN) &&
2132 thread->sched_pri >= processor->current_pri ) {
2133 cause_ast_check(processor);
2134 }
2135 }
2136 else
2137 if ( processor->state == PROCESSOR_SHUTDOWN &&
2138 thread->sched_pri >= processor->current_pri ) {
2139 cause_ast_check(processor);
2140 }
2141
2142 pset_unlock(pset);
2143}
9bccf70c 2144
2d21ac55
A
2145#define next_pset(p) (((p)->pset_list != PROCESSOR_SET_NULL)? (p)->pset_list: (p)->node->psets)
2146
2147/*
2148 * choose_next_pset:
2149 *
2150 * Return the next sibling pset containing
2151 * available processors.
2152 *
2153 * Returns the original pset if none other is
2154 * suitable.
2155 */
2156static processor_set_t
2157choose_next_pset(
2158 processor_set_t pset)
2159{
2160 processor_set_t nset = pset;
2161
2162 do {
2163 nset = next_pset(nset);
2164 } while (nset->processor_count < 1 && nset != pset);
2165
2166 return ((nset != pset)? nset: pset);
2167}
2168
2169/*
2170 * choose_processor:
2171 *
2172 * Choose a processor for the thread, beginning at
2173 * the pset.
2174 *
2175 * Returns a processor, possibly from a different pset.
2176 *
2177 * The thread must be locked. The pset must be locked,
2178 * and the resulting pset is locked on return.
2179 */
2180static processor_t
2181choose_processor(
2182 processor_set_t pset,
2183 thread_t thread)
2184{
2185 processor_set_t nset, cset = pset;
2186 processor_t processor;
2187
2188 /*
2189 * Iterate through the processor sets to locate
2190 * an appropriate processor.
2191 */
2192 do {
9bccf70c 2193 /*
2d21ac55 2194 * Choose an idle processor.
9bccf70c 2195 */
2d21ac55
A
2196 if (!queue_empty(&cset->idle_queue))
2197 return ((processor_t)queue_first(&cset->idle_queue));
1c79356b 2198
2d21ac55
A
2199 if (thread->sched_pri >= BASEPRI_RTQUEUES) {
2200 /*
2201 * For an RT thread, iterate through active processors, first fit.
2202 */
2203 processor = (processor_t)queue_first(&cset->active_queue);
2204 while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) {
2205 if (thread->sched_pri > processor->current_pri ||
2206 thread->realtime.deadline < processor->deadline)
2207 return (processor);
2208
2209 processor = (processor_t)queue_next((queue_entry_t)processor);
2210 }
2211 }
55e303ae 2212 else {
2d21ac55
A
2213 /*
2214 * Choose the low hint processor in the processor set if available.
2215 */
2216 processor = cset->low_hint;
2217 if (processor != PROCESSOR_NULL &&
2218 processor->state != PROCESSOR_SHUTDOWN && processor->state != PROCESSOR_OFF_LINE)
2219 return (processor);
9bccf70c 2220
9bccf70c 2221 /*
2d21ac55 2222 * Choose any active processor if the hint was invalid.
1c79356b 2223 */
2d21ac55
A
2224 processor = (processor_t)dequeue_head(&cset->active_queue);
2225 if (processor != PROCESSOR_NULL) {
2226 enqueue_tail(&cset->active_queue, (queue_entry_t)processor);
2227 return (processor);
2228 }
2229 }
2230
2231 /*
2232 * Move onto the next processor set.
2233 */
2234 nset = next_pset(cset);
2235
2236 if (nset != pset) {
2237 pset_unlock(cset);
2238
2239 cset = nset;
2240 pset_lock(cset);
2241 }
2242 } while (nset != pset);
2243
2244 /*
2245 * If all else fails choose the current processor,
2246 * this routine must return a running processor.
2247 */
2248 processor = current_processor();
2249 if (cset != processor->processor_set) {
2250 pset_unlock(cset);
2251
2252 cset = processor->processor_set;
2253 pset_lock(cset);
2254 }
2255
2256 return (processor);
2257}
2258
2259/*
2260 * thread_setrun:
2261 *
2262 * Dispatch thread for execution, onto an idle
2263 * processor or run queue, and signal a preemption
2264 * as appropriate.
2265 *
2266 * Thread must be locked.
2267 */
2268void
2269thread_setrun(
2270 thread_t thread,
2271 integer_t options)
2272{
2273 processor_t processor;
2274 processor_set_t pset;
2275
2276#if DEBUG
2277 assert(thread_runnable(thread));
2278#endif
55e303ae 2279
2d21ac55
A
2280 /*
2281 * Update priority if needed.
2282 */
2283 if (thread->sched_stamp != sched_tick)
2284 update_priority(thread);
2285
2286 assert(thread->runq == PROCESSOR_NULL);
2287
2288 if (thread->bound_processor == PROCESSOR_NULL) {
2289 /*
2290 * Unbound case.
2291 */
2292 if (thread->affinity_set != AFFINITY_SET_NULL) {
2293 /*
2294 * Use affinity set policy hint.
2295 */
2296 pset = thread->affinity_set->aset_pset;
2297 pset_lock(pset);
2298
2299 processor = choose_processor(pset, thread);
2300 }
2301 else
2302 if (thread->last_processor != PROCESSOR_NULL) {
2303 /*
2304 * Simple (last processor) affinity case.
2305 */
2306 processor = thread->last_processor;
2307 pset = processor->processor_set;
2308 pset_lock(pset);
2309
9bccf70c 2310 /*
2d21ac55 2311 * Choose a different processor in certain cases.
9bccf70c 2312 */
2d21ac55
A
2313 if (processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE)
2314 processor = choose_processor(pset, thread);
2315 else
2316 if (thread->sched_pri >= BASEPRI_RTQUEUES) {
9bccf70c 2317 /*
2d21ac55
A
2318 * If the processor is executing an RT thread with
2319 * an earlier deadline, choose another.
9bccf70c 2320 */
2d21ac55
A
2321 if (thread->sched_pri <= processor->current_pri ||
2322 thread->realtime.deadline >= processor->deadline)
2323 processor = choose_processor(pset, thread);
2324 }
2325 else
2326 if (processor->state != PROCESSOR_IDLE && pset->idle_count > 0) {
2327 processor = choose_processor(pset, thread);
2328 }
2329 else {
2330 processor_set_t nset = choose_next_pset(pset);
9bccf70c
A
2331
2332 /*
2d21ac55 2333 * Bump into a lesser loaded processor set if appropriate.
9bccf70c 2334 */
2d21ac55
A
2335 if (pset != nset && (nset->low_hint == PROCESSOR_NULL ||
2336 (pset->idle_count == 0 && nset->idle_count > 0) ||
2337 processor->runq.count > nset->low_hint->runq.count)) {
2338 pset_unlock(pset);
9bccf70c 2339
2d21ac55
A
2340 pset = nset;
2341 pset_lock(pset);
55e303ae 2342
2d21ac55 2343 processor = choose_processor(pset, thread);
9bccf70c
A
2344 }
2345 }
2d21ac55
A
2346 }
2347 else {
2348 /*
2349 * No Affinity case:
2350 *
2351 * Choose a processor from the current processor set.
2352 */
2353 processor = current_processor();
2354 pset = processor->processor_set;
2355 pset_lock(pset);
9bccf70c 2356
2d21ac55 2357 processor = choose_processor(pset, thread);
55e303ae 2358 }
1c79356b
A
2359 }
2360 else {
2d21ac55
A
2361 /*
2362 * Bound case:
2363 *
2364 * Unconditionally dispatch on the processor.
2365 */
2366 processor = thread->bound_processor;
55e303ae 2367 pset = processor->processor_set;
2d21ac55
A
2368 pset_lock(pset);
2369 }
2370
2371 /*
2372 * Dispatch the thread on the choosen processor.
2373 */
2374 if (thread->sched_pri >= BASEPRI_RTQUEUES)
2375 realtime_setrun(processor, thread);
2376 else
2377 processor_setrun(processor, thread, options);
2378}
2379
2380/*
2381 * processor_queue_shutdown:
2382 *
2383 * Shutdown a processor run queue by moving
2384 * non-bound threads to the current processor.
2385 *
2386 * Associated pset must be locked, and is
2387 * returned unlocked.
2388 */
2389void
2390processor_queue_shutdown(
2391 processor_t processor)
2392{
2393 processor_set_t pset = processor->processor_set;
2394 run_queue_t rq = &processor->runq;
2395 queue_t queue = rq->queues + rq->highq;
2396 int pri = rq->highq, count = rq->count;
2397 thread_t next, thread;
2398 queue_head_t tqueue;
2399
2400 queue_init(&tqueue);
2401
2402 while (count > 0) {
2403 thread = (thread_t)queue_first(queue);
2404 while (!queue_end(queue, (queue_entry_t)thread)) {
2405 next = (thread_t)queue_next((queue_entry_t)thread);
2406
2407 if (thread->bound_processor != processor) {
2408 remqueue(queue, (queue_entry_t)thread);
2409
2410 thread->runq = PROCESSOR_NULL;
2411 rq->count--;
2412 if (thread->sched_mode & TH_MODE_PREEMPT)
2413 rq->urgency--;
2414 if (queue_empty(queue)) {
2415 if (pri != IDLEPRI)
2416 clrbit(MAXPRI - pri, rq->bitmap);
2417 rq->highq = MAXPRI - ffsbit(rq->bitmap);
9bccf70c 2418 }
2d21ac55
A
2419
2420 enqueue_tail(&tqueue, (queue_entry_t)thread);
9bccf70c 2421 }
2d21ac55
A
2422 count--;
2423
2424 thread = next;
9bccf70c 2425 }
55e303ae 2426
2d21ac55
A
2427 queue--; pri--;
2428 }
2429
2430 pset_unlock(pset);
2431
2432 processor = current_processor();
2433 pset = processor->processor_set;
2434
2435 while ((thread = (thread_t)dequeue_head(&tqueue)) != THREAD_NULL) {
2436 thread_lock(thread);
2437 thread->last_processor = PROCESSOR_NULL;
55e303ae 2438
2d21ac55
A
2439 pset_lock(pset);
2440
2441 processor_enqueue(processor, thread, SCHED_TAILQ);
2442
2443 pset_unlock(pset);
2444
2445 thread_unlock(thread);
9bccf70c
A
2446 }
2447}
2448
2449/*
55e303ae
A
2450 * Check for a possible preemption point in
2451 * the (current) thread.
2452 *
2453 * Called at splsched.
9bccf70c
A
2454 */
2455ast_t
2456csw_check(
2457 thread_t thread,
2458 processor_t processor)
2459{
2460 int current_pri = thread->sched_pri;
2461 ast_t result = AST_NONE;
2462 run_queue_t runq;
2463
55e303ae 2464 if (first_timeslice(processor)) {
2d21ac55 2465 runq = &rt_runq;
55e303ae
A
2466 if (runq->highq >= BASEPRI_RTQUEUES)
2467 return (AST_PREEMPT | AST_URGENT);
2468
9bccf70c
A
2469 if (runq->highq > current_pri) {
2470 if (runq->urgency > 0)
55e303ae 2471 return (AST_PREEMPT | AST_URGENT);
9bccf70c 2472
55e303ae 2473 result |= AST_PREEMPT;
9bccf70c
A
2474 }
2475
2476 runq = &processor->runq;
2477 if (runq->highq > current_pri) {
2478 if (runq->urgency > 0)
55e303ae 2479 return (AST_PREEMPT | AST_URGENT);
9bccf70c 2480
55e303ae 2481 result |= AST_PREEMPT;
9bccf70c
A
2482 }
2483 }
2484 else {
2d21ac55 2485 runq = &rt_runq;
9bccf70c
A
2486 if (runq->highq >= current_pri) {
2487 if (runq->urgency > 0)
55e303ae 2488 return (AST_PREEMPT | AST_URGENT);
9bccf70c 2489
55e303ae 2490 result |= AST_PREEMPT;
9bccf70c
A
2491 }
2492
2493 runq = &processor->runq;
2494 if (runq->highq >= current_pri) {
2495 if (runq->urgency > 0)
55e303ae 2496 return (AST_PREEMPT | AST_URGENT);
9bccf70c 2497
55e303ae 2498 result |= AST_PREEMPT;
9bccf70c 2499 }
1c79356b 2500 }
9bccf70c
A
2501
2502 if (result != AST_NONE)
2503 return (result);
2504
2505 if (thread->state & TH_SUSP)
55e303ae 2506 result |= AST_PREEMPT;
9bccf70c
A
2507
2508 return (result);
1c79356b
A
2509}
2510
2511/*
9bccf70c 2512 * set_sched_pri:
1c79356b 2513 *
55e303ae
A
2514 * Set the scheduled priority of the specified thread.
2515 *
9bccf70c 2516 * This may cause the thread to change queues.
1c79356b 2517 *
55e303ae 2518 * Thread must be locked.
1c79356b
A
2519 */
2520void
9bccf70c 2521set_sched_pri(
2d21ac55
A
2522 thread_t thread,
2523 int priority)
1c79356b 2524{
2d21ac55 2525 boolean_t removed = run_queue_remove(thread);
9bccf70c
A
2526
2527 if ( !(thread->sched_mode & TH_MODE_TIMESHARE) &&
2528 (priority >= BASEPRI_PREEMPT ||
2529 (thread->task_priority < MINPRI_KERNEL &&
2530 thread->task_priority >= BASEPRI_BACKGROUND &&
91447636 2531 priority > thread->task_priority) ) )
9bccf70c
A
2532 thread->sched_mode |= TH_MODE_PREEMPT;
2533 else
2534 thread->sched_mode &= ~TH_MODE_PREEMPT;
1c79356b 2535
9bccf70c 2536 thread->sched_pri = priority;
2d21ac55 2537 if (removed)
55e303ae 2538 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
9bccf70c 2539 else
55e303ae 2540 if (thread->state & TH_RUN) {
9bccf70c
A
2541 processor_t processor = thread->last_processor;
2542
2543 if (thread == current_thread()) {
2544 ast_t preempt = csw_check(thread, processor);
2545
2546 if (preempt != AST_NONE)
2547 ast_on(preempt);
2548 processor->current_pri = priority;
2549 }
2550 else
2551 if ( processor != PROCESSOR_NULL &&
55e303ae 2552 processor->active_thread == thread )
9bccf70c 2553 cause_ast_check(processor);
1c79356b
A
2554 }
2555}
2556
91447636
A
2557#if 0
2558
2559static void
2560run_queue_check(
2561 run_queue_t rq,
2562 thread_t thread)
2563{
2564 queue_t q;
2565 queue_entry_t qe;
2566
2567 if (rq != thread->runq)
2568 panic("run_queue_check: thread runq");
2569
2570 if (thread->sched_pri > MAXPRI || thread->sched_pri < MINPRI)
2571 panic("run_queue_check: thread sched_pri");
2572
2573 q = &rq->queues[thread->sched_pri];
2574 qe = queue_first(q);
2575 while (!queue_end(q, qe)) {
2576 if (qe == (queue_entry_t)thread)
2577 return;
2578
2579 qe = queue_next(qe);
2580 }
2581
2582 panic("run_queue_check: end");
2583}
2584
2585#endif /* DEBUG */
2586
1c79356b 2587/*
55e303ae 2588 * run_queue_remove:
1c79356b 2589 *
2d21ac55
A
2590 * Remove a thread from a current run queue and
2591 * return TRUE if successful.
55e303ae
A
2592 *
2593 * Thread must be locked.
1c79356b 2594 */
2d21ac55 2595boolean_t
55e303ae 2596run_queue_remove(
2d21ac55 2597 thread_t thread)
1c79356b 2598{
2d21ac55 2599 processor_t processor = thread->runq;
1c79356b 2600
1c79356b 2601 /*
2d21ac55 2602 * If processor is PROCESSOR_NULL, the thread will stay out of the
55e303ae
A
2603 * run queues because the caller locked the thread. Otherwise
2604 * the thread is on a run queue, but could be chosen for dispatch
2605 * and removed.
1c79356b 2606 */
2d21ac55
A
2607 if (processor != PROCESSOR_NULL) {
2608 void * rqlock;
2609 run_queue_t rq;
55e303ae
A
2610
2611 /*
2d21ac55
A
2612 * The processor run queues are locked by the
2613 * processor set. Real-time priorities use a
2614 * global queue with a dedicated lock.
55e303ae 2615 */
2d21ac55
A
2616 if (thread->sched_pri < BASEPRI_RTQUEUES) {
2617 rqlock = &processor->processor_set->sched_lock;
2618 rq = &processor->runq;
2619 }
2620 else {
2621 rqlock = &rt_lock; rq = &rt_runq;
55e303ae
A
2622 }
2623
2d21ac55 2624 simple_lock(rqlock);
55e303ae 2625
2d21ac55 2626 if (processor == thread->runq) {
1c79356b 2627 /*
55e303ae
A
2628 * Thread is on a run queue and we have a lock on
2629 * that run queue.
1c79356b 2630 */
1c79356b
A
2631 remqueue(&rq->queues[0], (queue_entry_t)thread);
2632 rq->count--;
9bccf70c
A
2633 if (thread->sched_mode & TH_MODE_PREEMPT)
2634 rq->urgency--;
2635 assert(rq->urgency >= 0);
1c79356b
A
2636
2637 if (queue_empty(rq->queues + thread->sched_pri)) {
2638 /* update run queue status */
2639 if (thread->sched_pri != IDLEPRI)
2640 clrbit(MAXPRI - thread->sched_pri, rq->bitmap);
2641 rq->highq = MAXPRI - ffsbit(rq->bitmap);
2642 }
55e303ae 2643
2d21ac55 2644 thread->runq = PROCESSOR_NULL;
1c79356b
A
2645 }
2646 else {
2647 /*
55e303ae
A
2648 * The thread left the run queue before we could
2649 * lock the run queue.
1c79356b 2650 */
2d21ac55
A
2651 assert(thread->runq == PROCESSOR_NULL);
2652 processor = PROCESSOR_NULL;
1c79356b 2653 }
55e303ae 2654
2d21ac55 2655 simple_unlock(rqlock);
1c79356b
A
2656 }
2657
2d21ac55 2658 return (processor != PROCESSOR_NULL);
1c79356b
A
2659}
2660
1c79356b
A
2661/*
2662 * choose_thread:
2663 *
2d21ac55
A
2664 * Choose a thread to execute from the run queues
2665 * and return it. May steal a thread from another
2666 * processor.
1c79356b 2667 *
2d21ac55
A
2668 * Called with pset scheduling lock and rt lock held,
2669 * released on return.
1c79356b 2670 */
55e303ae 2671static thread_t
1c79356b 2672choose_thread(
55e303ae 2673 processor_t processor)
1c79356b 2674{
2d21ac55
A
2675 processor_set_t pset = processor->processor_set;
2676 thread_t thread;
1c79356b 2677
2d21ac55
A
2678 if (processor->runq.count > 0 && processor->runq.highq >= rt_runq.highq) {
2679 simple_unlock(&rt_lock);
1c79356b 2680
2d21ac55 2681 pset_hint_low(pset, processor);
1c79356b 2682
2d21ac55
A
2683 if (pset->high_hint != PROCESSOR_NULL) {
2684 if (processor != pset->high_hint) {
2685 if (processor->runq.count >= pset->high_hint->runq.count)
2686 pset->high_hint = processor;
2687 else
2688 if (pset->high_hint->runq.highq > processor->runq.highq) {
2689 thread = steal_thread(pset->high_hint);
2690 if (thread != THREAD_NULL) {
2691 processor->deadline = UINT64_MAX;
2692 pset_unlock(pset);
1c79356b 2693
2d21ac55
A
2694 return (thread);
2695 }
2696 }
2697 }
2698 }
2699 else
2700 pset->high_hint = processor;
9bccf70c 2701
2d21ac55 2702 thread = run_queue_dequeue(&processor->runq, SCHED_HEADQ);
1c79356b 2703
55e303ae 2704 processor->deadline = UINT64_MAX;
2d21ac55
A
2705 pset_unlock(pset);
2706
2707 return (thread);
1c79356b 2708 }
1c79356b 2709
2d21ac55
A
2710 thread = run_queue_dequeue(&rt_runq, SCHED_HEADQ);
2711 simple_unlock(&rt_lock);
2712
2713 processor->deadline = thread->realtime.deadline;
2714 pset_unlock(pset);
55e303ae
A
2715
2716 return (thread);
1c79356b
A
2717}
2718
2d21ac55
A
2719/*
2720 * steal_thread:
2721 *
2722 * Steal a thread from a processor and return it.
2723 *
2724 * Associated pset must be locked. Returns THREAD_NULL
2725 * on failure.
2726 */
2727static thread_t
2728steal_thread(
2729 processor_t processor)
91447636 2730{
2d21ac55
A
2731 run_queue_t rq = &processor->runq;
2732 queue_t queue = rq->queues + rq->highq;
2733 int pri = rq->highq, count = rq->count;
2734 thread_t thread = THREAD_NULL;
2735
2736 while (count > 0) {
2737 thread = (thread_t)queue_first(queue);
2738 while (!queue_end(queue, (queue_entry_t)thread)) {
2739 if (thread->bound_processor != processor) {
2740 remqueue(queue, (queue_entry_t)thread);
2741
2742 thread->runq = PROCESSOR_NULL;
2743 rq->count--;
2744 if (thread->sched_mode & TH_MODE_PREEMPT)
2745 rq->urgency--;
2746 if (queue_empty(queue)) {
2747 if (pri != IDLEPRI)
2748 clrbit(MAXPRI - pri, rq->bitmap);
2749 rq->highq = MAXPRI - ffsbit(rq->bitmap);
2750 }
91447636 2751
2d21ac55
A
2752 return (thread);
2753 }
2754 count--;
91447636 2755
2d21ac55 2756 thread = (thread_t)queue_next((queue_entry_t)thread);
91447636 2757 }
91447636 2758
2d21ac55
A
2759 queue--; pri--;
2760 }
91447636 2761
2d21ac55 2762 return (THREAD_NULL);
91447636
A
2763}
2764
1c79356b 2765/*
2d21ac55
A
2766 * This is the processor idle loop, which just looks for other threads
2767 * to execute. Processor idle threads invoke this without supplying a
2768 * current thread to idle without an asserted wait state.
2769 *
2770 * Returns a the next thread to execute if dispatched directly.
1c79356b 2771 */
2d21ac55
A
2772static thread_t
2773processor_idle(
2774 thread_t thread,
2775 processor_t processor)
1c79356b 2776{
2d21ac55
A
2777 processor_set_t pset = processor->processor_set;
2778 thread_t new_thread;
2779 int state;
1c79356b 2780
2d21ac55 2781 (void)splsched();
1c79356b 2782
2d21ac55
A
2783#ifdef __ppc__
2784 pmsDown(); /* Step power down */
2785#endif
3a60a9f5 2786
2d21ac55
A
2787 KERNEL_DEBUG_CONSTANT(
2788 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_START, (int)thread, 0, 0, 0, 0);
3a60a9f5 2789
2d21ac55
A
2790 timer_switch(&PROCESSOR_DATA(processor, system_state),
2791 mach_absolute_time(), &PROCESSOR_DATA(processor, idle_state));
2792 PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, idle_state);
3a60a9f5 2793
2d21ac55
A
2794 while (processor->next_thread == THREAD_NULL && processor->runq.count == 0 &&
2795 (thread == THREAD_NULL || ((thread->state & (TH_WAIT|TH_SUSP)) == TH_WAIT && !thread->wake_active))) {
2796 volatile processor_t hint;
de355530 2797
2d21ac55 2798 machine_idle();
55e303ae
A
2799
2800 (void)splsched();
2d21ac55
A
2801
2802 if (pset->low_hint == PROCESSOR_NULL)
2803 break;
2804
2805 hint = pset->high_hint;
2806 if (hint != PROCESSOR_NULL && hint->runq.count > 0)
2807 break;
55e303ae
A
2808 }
2809
2d21ac55
A
2810 KERNEL_DEBUG_CONSTANT(
2811 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, 0, 0, 0, 0);
2812
2813 timer_switch(&PROCESSOR_DATA(processor, idle_state),
2814 mach_absolute_time(), &PROCESSOR_DATA(processor, system_state));
2815 PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, system_state);
1c79356b 2816
2d21ac55
A
2817 pset_lock(pset);
2818
2819#ifdef __ppc__
2820 pmsStep(0); /* Step up out of idle power */
2821#endif
3a60a9f5 2822
55e303ae
A
2823 state = processor->state;
2824 if (state == PROCESSOR_DISPATCHING) {
1c79356b 2825 /*
55e303ae 2826 * Commmon case -- cpu dispatched.
1c79356b 2827 */
2d21ac55
A
2828 new_thread = processor->next_thread;
2829 processor->next_thread = THREAD_NULL;
55e303ae 2830 processor->state = PROCESSOR_RUNNING;
1c79356b 2831
2d21ac55
A
2832 if ( processor->runq.highq > new_thread->sched_pri ||
2833 rt_runq.highq >= new_thread->sched_pri ) {
2834 processor->deadline = UINT64_MAX;
55e303ae 2835
2d21ac55 2836 pset_unlock(pset);
1c79356b 2837
2d21ac55
A
2838 thread_lock(new_thread);
2839 thread_setrun(new_thread, SCHED_HEADQ);
2840 thread_unlock(new_thread);
55e303ae 2841
2d21ac55 2842 return (THREAD_NULL);
1c79356b 2843 }
1c79356b 2844
2d21ac55
A
2845 pset_unlock(pset);
2846
2847 return (new_thread);
55e303ae
A
2848 }
2849 else
2850 if (state == PROCESSOR_IDLE) {
55e303ae 2851 remqueue(&pset->idle_queue, (queue_entry_t)processor);
2d21ac55 2852 pset->idle_count--;
1c79356b 2853
2d21ac55
A
2854 processor->state = PROCESSOR_RUNNING;
2855 enqueue_head(&pset->active_queue, (queue_entry_t)processor);
1c79356b 2856 }
55e303ae
A
2857 else
2858 if (state == PROCESSOR_SHUTDOWN) {
2859 /*
2860 * Going off-line. Force a
2861 * reschedule.
2862 */
2d21ac55
A
2863 if ((new_thread = processor->next_thread) != THREAD_NULL) {
2864 processor->next_thread = THREAD_NULL;
55e303ae 2865 processor->deadline = UINT64_MAX;
2d21ac55
A
2866
2867 pset_unlock(pset);
55e303ae
A
2868
2869 thread_lock(new_thread);
2870 thread_setrun(new_thread, SCHED_HEADQ);
2871 thread_unlock(new_thread);
55e303ae 2872
2d21ac55
A
2873 return (THREAD_NULL);
2874 }
55e303ae
A
2875 }
2876
2d21ac55
A
2877 pset_unlock(pset);
2878
2879 return (THREAD_NULL);
2880}
2881
2882void
2883idle_thread(void)
2884{
2885 processor_t processor = current_processor();
2886 thread_t new_thread;
2887
2888 new_thread = processor_idle(THREAD_NULL, processor);
2889 if (new_thread != THREAD_NULL) {
2890 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
2891 /*NOTREACHED*/
2892 }
55e303ae 2893
2d21ac55 2894 thread_block((thread_continue_t)idle_thread);
55e303ae 2895 /*NOTREACHED*/
1c79356b
A
2896}
2897
91447636
A
2898kern_return_t
2899idle_thread_create(
2900 processor_t processor)
1c79356b 2901{
91447636
A
2902 kern_return_t result;
2903 thread_t thread;
2904 spl_t s;
2905
2906 result = kernel_thread_create((thread_continue_t)idle_thread, NULL, MAXPRI_KERNEL, &thread);
2907 if (result != KERN_SUCCESS)
2908 return (result);
2909
2910 s = splsched();
2911 thread_lock(thread);
2912 thread->bound_processor = processor;
2913 processor->idle_thread = thread;
2914 thread->sched_pri = thread->priority = IDLEPRI;
2915 thread->state = (TH_RUN | TH_IDLE);
2916 thread_unlock(thread);
2917 splx(s);
2918
2919 thread_deallocate(thread);
2920
2921 return (KERN_SUCCESS);
1c79356b
A
2922}
2923
55e303ae 2924static uint64_t sched_tick_deadline;
0b4e3aa0 2925
91447636
A
2926/*
2927 * sched_startup:
2928 *
2929 * Kicks off scheduler services.
2930 *
2931 * Called at splsched.
2932 */
0b4e3aa0 2933void
91447636 2934sched_startup(void)
0b4e3aa0 2935{
91447636
A
2936 kern_return_t result;
2937 thread_t thread;
2938
2939 result = kernel_thread_start_priority((thread_continue_t)sched_tick_thread, NULL, MAXPRI_KERNEL, &thread);
2940 if (result != KERN_SUCCESS)
2941 panic("sched_startup");
2942
2943 thread_deallocate(thread);
2944
2945 /*
2946 * Yield to the sched_tick_thread while it times
2947 * a series of context switches back. It stores
2948 * the baseline value in sched_cswtime.
2949 *
2950 * The current thread is the only other thread
2951 * active at this point.
2952 */
2953 while (sched_cswtime == 0)
2954 thread_block(THREAD_CONTINUE_NULL);
2955
2956 thread_daemon_init();
2957
2958 thread_call_initialize();
0b4e3aa0 2959}
1c79356b
A
2960
2961/*
91447636 2962 * sched_tick_thread:
1c79356b 2963 *
55e303ae
A
2964 * Perform periodic bookkeeping functions about ten
2965 * times per second.
1c79356b 2966 */
91447636
A
2967static void
2968sched_tick_continue(void)
1c79356b 2969{
91447636 2970 uint64_t abstime = mach_absolute_time();
1c79356b 2971
91447636 2972 sched_tick++;
1c79356b
A
2973
2974 /*
91447636 2975 * Compute various averages.
1c79356b 2976 */
91447636 2977 compute_averages();
1c79356b
A
2978
2979 /*
91447636
A
2980 * Scan the run queues for threads which
2981 * may need to be updated.
1c79356b 2982 */
91447636 2983 thread_update_scan();
1c79356b 2984
2d21ac55
A
2985 if (pm_tick_callout != NULL)
2986 (*pm_tick_callout)();
2987
1c79356b
A
2988 clock_deadline_for_periodic_event(sched_tick_interval, abstime,
2989 &sched_tick_deadline);
2990
91447636
A
2991 assert_wait_deadline((event_t)sched_tick_thread, THREAD_UNINT, sched_tick_deadline);
2992 thread_block((thread_continue_t)sched_tick_continue);
1c79356b
A
2993 /*NOTREACHED*/
2994}
2995
91447636
A
2996/*
2997 * Time a series of context switches to determine
2998 * a baseline. Toss the high and low and return
2999 * the one-way value.
3000 */
3001static uint32_t
3002time_cswitch(void)
3003{
3004 uint32_t new, hi, low, accum;
3005 uint64_t abstime;
3006 int i, tries = 7;
3007
3008 accum = hi = low = 0;
3009 for (i = 0; i < tries; ++i) {
3010 abstime = mach_absolute_time();
3011 thread_block(THREAD_CONTINUE_NULL);
3012
3013 new = mach_absolute_time() - abstime;
3014
3015 if (i == 0)
3016 accum = hi = low = new;
3017 else {
3018 if (new < low)
3019 low = new;
3020 else
3021 if (new > hi)
3022 hi = new;
3023 accum += new;
3024 }
3025 }
3026
3027 return ((accum - hi - low) / (2 * (tries - 2)));
3028}
3029
1c79356b
A
3030void
3031sched_tick_thread(void)
3032{
91447636
A
3033 sched_cswtime = time_cswitch();
3034
55e303ae 3035 sched_tick_deadline = mach_absolute_time();
1c79356b 3036
91447636 3037 sched_tick_continue();
1c79356b
A
3038 /*NOTREACHED*/
3039}
3040
1c79356b 3041/*
91447636 3042 * thread_update_scan / runq_scan:
55e303ae 3043 *
91447636
A
3044 * Scan the run queues to account for timesharing threads
3045 * which need to be updated.
1c79356b
A
3046 *
3047 * Scanner runs in two passes. Pass one squirrels likely
91447636 3048 * threads away in an array, pass two does the update.
1c79356b 3049 *
91447636
A
3050 * This is necessary because the run queue is locked for
3051 * the candidate scan, but the thread is locked for the update.
1c79356b 3052 *
91447636
A
3053 * Array should be sized to make forward progress, without
3054 * disabling preemption for long periods.
1c79356b 3055 */
55e303ae 3056
91447636 3057#define THREAD_UPDATE_SIZE 128
55e303ae 3058
91447636
A
3059static thread_t thread_update_array[THREAD_UPDATE_SIZE];
3060static int thread_update_count = 0;
1c79356b
A
3061
3062/*
91447636
A
3063 * Scan a runq for candidate threads.
3064 *
3065 * Returns TRUE if retry is needed.
1c79356b 3066 */
55e303ae 3067static boolean_t
91447636 3068runq_scan(
1c79356b
A
3069 run_queue_t runq)
3070{
91447636 3071 register int count;
1c79356b
A
3072 register queue_t q;
3073 register thread_t thread;
1c79356b 3074
1c79356b
A
3075 if ((count = runq->count) > 0) {
3076 q = runq->queues + runq->highq;
3077 while (count > 0) {
3078 queue_iterate(q, thread, thread_t, links) {
55e303ae 3079 if ( thread->sched_stamp != sched_tick &&
0b4e3aa0 3080 (thread->sched_mode & TH_MODE_TIMESHARE) ) {
91447636 3081 if (thread_update_count == THREAD_UPDATE_SIZE)
55e303ae 3082 return (TRUE);
1c79356b 3083
91447636
A
3084 thread_update_array[thread_update_count++] = thread;
3085 thread_reference_internal(thread);
1c79356b
A
3086 }
3087
3088 count--;
3089 }
3090
3091 q--;
3092 }
3093 }
1c79356b 3094
91447636 3095 return (FALSE);
1c79356b
A
3096}
3097
55e303ae 3098static void
91447636 3099thread_update_scan(void)
1c79356b 3100{
2d21ac55
A
3101 boolean_t restart_needed = FALSE;
3102 processor_t processor = processor_list;
3103 processor_set_t pset;
3104 thread_t thread;
3105 spl_t s;
1c79356b 3106
1c79356b 3107 do {
2d21ac55
A
3108 do {
3109 pset = processor->processor_set;
1c79356b 3110
2d21ac55
A
3111 s = splsched();
3112 pset_lock(pset);
0b4e3aa0 3113
2d21ac55
A
3114 restart_needed = runq_scan(&processor->runq);
3115
3116 pset_unlock(pset);
3117 splx(s);
3118
3119 if (restart_needed)
3120 break;
3121
3122 thread = processor->idle_thread;
3123 if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) {
3124 if (thread_update_count == THREAD_UPDATE_SIZE) {
3125 restart_needed = TRUE;
3126 break;
0b4e3aa0
A
3127 }
3128
2d21ac55
A
3129 thread_update_array[thread_update_count++] = thread;
3130 thread_reference_internal(thread);
1c79356b 3131 }
2d21ac55 3132 } while ((processor = processor->processor_list) != NULL);
1c79356b
A
3133
3134 /*
3135 * Ok, we now have a collection of candidates -- fix them.
3136 */
91447636
A
3137 while (thread_update_count > 0) {
3138 thread = thread_update_array[--thread_update_count];
3139 thread_update_array[thread_update_count] = THREAD_NULL;
55e303ae 3140
1c79356b
A
3141 s = splsched();
3142 thread_lock(thread);
55e303ae
A
3143 if ( !(thread->state & (TH_WAIT|TH_SUSP)) &&
3144 thread->sched_stamp != sched_tick )
3145 update_priority(thread);
1c79356b
A
3146 thread_unlock(thread);
3147 splx(s);
55e303ae 3148
91447636 3149 thread_deallocate(thread);
1c79356b 3150 }
1c79356b
A
3151 } while (restart_needed);
3152}
3153
3154/*
3155 * Just in case someone doesn't use the macro
3156 */
3157#undef thread_wakeup
3158void
3159thread_wakeup(
3160 event_t x);
3161
3162void
3163thread_wakeup(
3164 event_t x)
3165{
3166 thread_wakeup_with_result(x, THREAD_AWAKENED);
3167}
3168
91447636
A
3169boolean_t
3170preemption_enabled(void)
3171{
3172 return (get_preemption_level() == 0 && ml_get_interrupts_enabled());
3173}
9bccf70c 3174
0b4e3aa0 3175#if DEBUG
0b4e3aa0 3176static boolean_t
1c79356b 3177thread_runnable(
0b4e3aa0 3178 thread_t thread)
1c79356b 3179{
0b4e3aa0 3180 return ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN);
1c79356b 3181}
1c79356b
A
3182#endif /* DEBUG */
3183
3184#if MACH_KDB
3185#include <ddb/db_output.h>
3186#define printf kdbprintf
1c79356b
A
3187void db_sched(void);
3188
3189void
3190db_sched(void)
3191{
3192 iprintf("Scheduling Statistics:\n");
3193 db_indent += 2;
3194 iprintf("Thread invocations: csw %d same %d\n",
3195 c_thread_invoke_csw, c_thread_invoke_same);
3196#if MACH_COUNTERS
3197 iprintf("Thread block: calls %d\n",
3198 c_thread_block_calls);
2d21ac55 3199 iprintf("Idle thread:\n\thandoff %d block %d\n",
1c79356b 3200 c_idle_thread_handoff,
2d21ac55 3201 c_idle_thread_block);
1c79356b
A
3202 iprintf("Sched thread blocks: %d\n", c_sched_thread_block);
3203#endif /* MACH_COUNTERS */
3204 db_indent -= 2;
3205}
55e303ae
A
3206
3207#include <ddb/db_output.h>
3208void db_show_thread_log(void);
3209
3210void
3211db_show_thread_log(void)
3212{
3213}
1c79356b 3214#endif /* MACH_KDB */