]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/sched_prim.c
xnu-2422.90.20.tar.gz
[apple/xnu.git] / osfmk / kern / sched_prim.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: sched_prim.c
60 * Author: Avadis Tevanian, Jr.
61 * Date: 1986
62 *
63 * Scheduling primitives
64 *
65 */
66
67 #include <debug.h>
68
69 #include <mach/mach_types.h>
70 #include <mach/machine.h>
71 #include <mach/policy.h>
72 #include <mach/sync_policy.h>
73 #include <mach/thread_act.h>
74
75 #include <machine/machine_routines.h>
76 #include <machine/sched_param.h>
77 #include <machine/machine_cpu.h>
78 #include <machine/machlimits.h>
79
80 #include <kern/kern_types.h>
81 #include <kern/clock.h>
82 #include <kern/counters.h>
83 #include <kern/cpu_number.h>
84 #include <kern/cpu_data.h>
85 #include <kern/debug.h>
86 #include <kern/lock.h>
87 #include <kern/macro_help.h>
88 #include <kern/machine.h>
89 #include <kern/misc_protos.h>
90 #include <kern/processor.h>
91 #include <kern/queue.h>
92 #include <kern/sched.h>
93 #include <kern/sched_prim.h>
94 #include <kern/syscall_subr.h>
95 #include <kern/task.h>
96 #include <kern/thread.h>
97 #include <kern/wait_queue.h>
98 #include <kern/ledger.h>
99 #include <kern/timer_queue.h>
100
101 #include <vm/pmap.h>
102 #include <vm/vm_kern.h>
103 #include <vm/vm_map.h>
104
105 #include <mach/sdt.h>
106
107 #include <sys/kdebug.h>
108
109 #include <kern/pms.h>
110
111 struct rt_queue rt_runq;
112 #define RT_RUNQ ((processor_t)-1)
113 decl_simple_lock_data(static,rt_lock);
114
115 #if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY)
116 static struct fairshare_queue fs_runq;
117 #define FS_RUNQ ((processor_t)-2)
118 decl_simple_lock_data(static,fs_lock);
119 #endif
120
121 #define DEFAULT_PREEMPTION_RATE 100 /* (1/s) */
122 int default_preemption_rate = DEFAULT_PREEMPTION_RATE;
123
124 #define DEFAULT_BG_PREEMPTION_RATE 400 /* (1/s) */
125 int default_bg_preemption_rate = DEFAULT_BG_PREEMPTION_RATE;
126
127 #define MAX_UNSAFE_QUANTA 800
128 int max_unsafe_quanta = MAX_UNSAFE_QUANTA;
129
130 #define MAX_POLL_QUANTA 2
131 int max_poll_quanta = MAX_POLL_QUANTA;
132
133 #define SCHED_POLL_YIELD_SHIFT 4 /* 1/16 */
134 int sched_poll_yield_shift = SCHED_POLL_YIELD_SHIFT;
135
136 uint64_t max_poll_computation;
137
138 uint64_t max_unsafe_computation;
139 uint64_t sched_safe_duration;
140
141 #if defined(CONFIG_SCHED_TRADITIONAL)
142
143 uint32_t std_quantum;
144 uint32_t min_std_quantum;
145 uint32_t bg_quantum;
146
147 uint32_t std_quantum_us;
148 uint32_t bg_quantum_us;
149
150 #endif /* CONFIG_SCHED_TRADITIONAL */
151
152 uint32_t thread_depress_time;
153 uint32_t default_timeshare_computation;
154 uint32_t default_timeshare_constraint;
155
156 uint32_t max_rt_quantum;
157 uint32_t min_rt_quantum;
158
159 #if defined(CONFIG_SCHED_TRADITIONAL)
160
161 unsigned sched_tick;
162 uint32_t sched_tick_interval;
163
164 uint32_t sched_pri_shift = INT8_MAX;
165 uint32_t sched_background_pri_shift = INT8_MAX;
166 uint32_t sched_combined_fgbg_pri_shift = INT8_MAX;
167 uint32_t sched_fixed_shift;
168 uint32_t sched_use_combined_fgbg_decay = 0;
169
170 uint32_t sched_decay_usage_age_factor = 1; /* accelerate 5/8^n usage aging */
171
172 static boolean_t sched_traditional_use_pset_runqueue = FALSE;
173
174 /* Defaults for timer deadline profiling */
175 #define TIMER_DEADLINE_TRACKING_BIN_1_DEFAULT 2000000 /* Timers with deadlines <=
176 * 2ms */
177 #define TIMER_DEADLINE_TRACKING_BIN_2_DEFAULT 5000000 /* Timers with deadlines
178 <= 5ms */
179
180 uint64_t timer_deadline_tracking_bin_1;
181 uint64_t timer_deadline_tracking_bin_2;
182
183 thread_t sched_maintenance_thread;
184
185 __attribute__((always_inline))
186 static inline run_queue_t runq_for_processor(processor_t processor)
187 {
188 if (sched_traditional_use_pset_runqueue)
189 return &processor->processor_set->pset_runq;
190 else
191 return &processor->runq;
192 }
193
194 __attribute__((always_inline))
195 static inline void runq_consider_incr_bound_count(processor_t processor, thread_t thread)
196 {
197 if (thread->bound_processor == PROCESSOR_NULL)
198 return;
199
200 assert(thread->bound_processor == processor);
201
202 if (sched_traditional_use_pset_runqueue)
203 processor->processor_set->pset_runq_bound_count++;
204
205 processor->runq_bound_count++;
206 }
207
208 __attribute__((always_inline))
209 static inline void runq_consider_decr_bound_count(processor_t processor, thread_t thread)
210 {
211 if (thread->bound_processor == PROCESSOR_NULL)
212 return;
213
214 assert(thread->bound_processor == processor);
215
216 if (sched_traditional_use_pset_runqueue)
217 processor->processor_set->pset_runq_bound_count--;
218
219 processor->runq_bound_count--;
220 }
221
222 #endif /* CONFIG_SCHED_TRADITIONAL */
223
224 uint64_t sched_one_second_interval;
225
226 uint32_t sched_run_count, sched_share_count, sched_background_count;
227 uint32_t sched_load_average, sched_mach_factor;
228
229 /* Forwards */
230
231 #if defined(CONFIG_SCHED_TRADITIONAL)
232
233 static void load_shift_init(void);
234 static void preempt_pri_init(void);
235
236 #endif /* CONFIG_SCHED_TRADITIONAL */
237
238 static thread_t thread_select(
239 thread_t thread,
240 processor_t processor);
241
242 #if CONFIG_SCHED_IDLE_IN_PLACE
243 static thread_t thread_select_idle(
244 thread_t thread,
245 processor_t processor);
246 #endif
247
248 thread_t processor_idle(
249 thread_t thread,
250 processor_t processor);
251
252 ast_t
253 csw_check_locked( processor_t processor,
254 processor_set_t pset);
255
256 #if defined(CONFIG_SCHED_TRADITIONAL)
257
258 static thread_t steal_thread(
259 processor_set_t pset);
260
261 static thread_t steal_thread_disabled(
262 processor_set_t pset) __attribute__((unused));
263
264
265 static thread_t steal_processor_thread(
266 processor_t processor);
267
268 static void thread_update_scan(void);
269
270 static void processor_setrun(
271 processor_t processor,
272 thread_t thread,
273 integer_t options);
274
275 static boolean_t
276 processor_enqueue(
277 processor_t processor,
278 thread_t thread,
279 integer_t options);
280
281 static boolean_t
282 processor_queue_remove(
283 processor_t processor,
284 thread_t thread);
285
286 static boolean_t processor_queue_empty(processor_t processor);
287
288 static boolean_t priority_is_urgent(int priority);
289
290 static ast_t processor_csw_check(processor_t processor);
291
292 static boolean_t processor_queue_has_priority(processor_t processor,
293 int priority,
294 boolean_t gte);
295
296 static boolean_t should_current_thread_rechoose_processor(processor_t processor);
297
298 static int sched_traditional_processor_runq_count(processor_t processor);
299
300 static boolean_t sched_traditional_with_pset_runqueue_processor_queue_empty(processor_t processor);
301
302 static uint64_t sched_traditional_processor_runq_stats_count_sum(processor_t processor);
303
304 static uint64_t sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum(processor_t processor);
305 #endif
306
307
308 #if defined(CONFIG_SCHED_TRADITIONAL)
309
310 static void
311 sched_traditional_init(void);
312
313 static void
314 sched_traditional_timebase_init(void);
315
316 static void
317 sched_traditional_processor_init(processor_t processor);
318
319 static void
320 sched_traditional_pset_init(processor_set_t pset);
321
322 static void
323 sched_traditional_with_pset_runqueue_init(void);
324
325 #endif
326
327 static void
328 sched_realtime_init(void);
329
330 static void
331 sched_realtime_timebase_init(void);
332
333 static void
334 sched_timer_deadline_tracking_init(void);
335
336 #if defined(CONFIG_SCHED_TRADITIONAL)
337 static void
338 sched_traditional_maintenance_continue(void);
339
340 static uint32_t
341 sched_traditional_initial_quantum_size(thread_t thread);
342
343 static sched_mode_t
344 sched_traditional_initial_thread_sched_mode(task_t parent_task);
345
346 static boolean_t
347 sched_traditional_supports_timeshare_mode(void);
348
349 static thread_t
350 sched_traditional_choose_thread(
351 processor_t processor,
352 int priority);
353
354 #endif
355
356 #if DEBUG
357 extern int debug_task;
358 #define TLOG(a, fmt, args...) if(debug_task & a) kprintf(fmt, ## args)
359 #else
360 #define TLOG(a, fmt, args...) do {} while (0)
361 #endif
362
363 #if DEBUG
364 static
365 boolean_t thread_runnable(
366 thread_t thread);
367
368 #endif /*DEBUG*/
369
370 /*
371 * State machine
372 *
373 * states are combinations of:
374 * R running
375 * W waiting (or on wait queue)
376 * N non-interruptible
377 * O swapped out
378 * I being swapped in
379 *
380 * init action
381 * assert_wait thread_block clear_wait swapout swapin
382 *
383 * R RW, RWN R; setrun - -
384 * RN RWN RN; setrun - -
385 *
386 * RW W R -
387 * RWN WN RN -
388 *
389 * W R; setrun WO
390 * WN RN; setrun -
391 *
392 * RO - - R
393 *
394 */
395
396 #if defined(CONFIG_SCHED_TRADITIONAL)
397 int8_t sched_load_shifts[NRQS];
398 int sched_preempt_pri[NRQBM];
399 #endif
400
401
402 #if defined(CONFIG_SCHED_TRADITIONAL)
403
404 const struct sched_dispatch_table sched_traditional_dispatch = {
405 sched_traditional_init,
406 sched_traditional_timebase_init,
407 sched_traditional_processor_init,
408 sched_traditional_pset_init,
409 sched_traditional_maintenance_continue,
410 sched_traditional_choose_thread,
411 steal_thread,
412 compute_priority,
413 choose_processor,
414 processor_enqueue,
415 processor_queue_shutdown,
416 processor_queue_remove,
417 processor_queue_empty,
418 priority_is_urgent,
419 processor_csw_check,
420 processor_queue_has_priority,
421 sched_traditional_initial_quantum_size,
422 sched_traditional_initial_thread_sched_mode,
423 sched_traditional_supports_timeshare_mode,
424 can_update_priority,
425 update_priority,
426 lightweight_update_priority,
427 sched_traditional_quantum_expire,
428 should_current_thread_rechoose_processor,
429 sched_traditional_processor_runq_count,
430 sched_traditional_processor_runq_stats_count_sum,
431 sched_traditional_fairshare_init,
432 sched_traditional_fairshare_runq_count,
433 sched_traditional_fairshare_runq_stats_count_sum,
434 sched_traditional_fairshare_enqueue,
435 sched_traditional_fairshare_dequeue,
436 sched_traditional_fairshare_queue_remove,
437 TRUE /* direct_dispatch_to_idle_processors */
438 };
439
440 const struct sched_dispatch_table sched_traditional_with_pset_runqueue_dispatch = {
441 sched_traditional_with_pset_runqueue_init,
442 sched_traditional_timebase_init,
443 sched_traditional_processor_init,
444 sched_traditional_pset_init,
445 sched_traditional_maintenance_continue,
446 sched_traditional_choose_thread,
447 steal_thread,
448 compute_priority,
449 choose_processor,
450 processor_enqueue,
451 processor_queue_shutdown,
452 processor_queue_remove,
453 sched_traditional_with_pset_runqueue_processor_queue_empty,
454 priority_is_urgent,
455 processor_csw_check,
456 processor_queue_has_priority,
457 sched_traditional_initial_quantum_size,
458 sched_traditional_initial_thread_sched_mode,
459 sched_traditional_supports_timeshare_mode,
460 can_update_priority,
461 update_priority,
462 lightweight_update_priority,
463 sched_traditional_quantum_expire,
464 should_current_thread_rechoose_processor,
465 sched_traditional_processor_runq_count,
466 sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum,
467 sched_traditional_fairshare_init,
468 sched_traditional_fairshare_runq_count,
469 sched_traditional_fairshare_runq_stats_count_sum,
470 sched_traditional_fairshare_enqueue,
471 sched_traditional_fairshare_dequeue,
472 sched_traditional_fairshare_queue_remove,
473 FALSE /* direct_dispatch_to_idle_processors */
474 };
475
476 #endif
477
478 const struct sched_dispatch_table *sched_current_dispatch = NULL;
479
480 /*
481 * Statically allocate a buffer to hold the longest possible
482 * scheduler description string, as currently implemented.
483 * bsd/kern/kern_sysctl.c has a corresponding definition in bsd/
484 * to export to userspace via sysctl(3). If either version
485 * changes, update the other.
486 *
487 * Note that in addition to being an upper bound on the strings
488 * in the kernel, it's also an exact parameter to PE_get_default(),
489 * which interrogates the device tree on some platforms. That
490 * API requires the caller know the exact size of the device tree
491 * property, so we need both a legacy size (32) and the current size
492 * (48) to deal with old and new device trees. The device tree property
493 * is similarly padded to a fixed size so that the same kernel image
494 * can run on multiple devices with different schedulers configured
495 * in the device tree.
496 */
497 #define SCHED_STRING_MAX_LENGTH (48)
498
499 char sched_string[SCHED_STRING_MAX_LENGTH];
500 static enum sched_enum _sched_enum __attribute__((used)) = sched_enum_unknown;
501
502 /* Global flag which indicates whether Background Stepper Context is enabled */
503 static int cpu_throttle_enabled = 1;
504
505 void
506 sched_init(void)
507 {
508 char sched_arg[SCHED_STRING_MAX_LENGTH] = { '\0' };
509
510 /* Check for runtime selection of the scheduler algorithm */
511 if (!PE_parse_boot_argn("sched", sched_arg, sizeof (sched_arg))) {
512 /* If no boot-args override, look in device tree */
513 if (!PE_get_default("kern.sched", sched_arg,
514 SCHED_STRING_MAX_LENGTH)) {
515 sched_arg[0] = '\0';
516 }
517 }
518
519 if (strlen(sched_arg) > 0) {
520 if (0) {
521 /* Allow pattern below */
522 #if defined(CONFIG_SCHED_TRADITIONAL)
523 } else if (0 == strcmp(sched_arg, kSchedTraditionalString)) {
524 sched_current_dispatch = &sched_traditional_dispatch;
525 _sched_enum = sched_enum_traditional;
526 strlcpy(sched_string, kSchedTraditionalString, sizeof(sched_string));
527 kprintf("Scheduler: Runtime selection of %s\n", kSchedTraditionalString);
528 } else if (0 == strcmp(sched_arg, kSchedTraditionalWithPsetRunqueueString)) {
529 sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch;
530 _sched_enum = sched_enum_traditional_with_pset_runqueue;
531 strlcpy(sched_string, kSchedTraditionalWithPsetRunqueueString, sizeof(sched_string));
532 kprintf("Scheduler: Runtime selection of %s\n", kSchedTraditionalWithPsetRunqueueString);
533 #endif
534 #if defined(CONFIG_SCHED_PROTO)
535 } else if (0 == strcmp(sched_arg, kSchedProtoString)) {
536 sched_current_dispatch = &sched_proto_dispatch;
537 _sched_enum = sched_enum_proto;
538 strlcpy(sched_string, kSchedProtoString, sizeof(sched_string));
539 kprintf("Scheduler: Runtime selection of %s\n", kSchedProtoString);
540 #endif
541 #if defined(CONFIG_SCHED_GRRR)
542 } else if (0 == strcmp(sched_arg, kSchedGRRRString)) {
543 sched_current_dispatch = &sched_grrr_dispatch;
544 _sched_enum = sched_enum_grrr;
545 strlcpy(sched_string, kSchedGRRRString, sizeof(sched_string));
546 kprintf("Scheduler: Runtime selection of %s\n", kSchedGRRRString);
547 #endif
548 #if defined(CONFIG_SCHED_FIXEDPRIORITY)
549 } else if (0 == strcmp(sched_arg, kSchedFixedPriorityString)) {
550 sched_current_dispatch = &sched_fixedpriority_dispatch;
551 _sched_enum = sched_enum_fixedpriority;
552 strlcpy(sched_string, kSchedFixedPriorityString, sizeof(sched_string));
553 kprintf("Scheduler: Runtime selection of %s\n", kSchedFixedPriorityString);
554 } else if (0 == strcmp(sched_arg, kSchedFixedPriorityWithPsetRunqueueString)) {
555 sched_current_dispatch = &sched_fixedpriority_with_pset_runqueue_dispatch;
556 _sched_enum = sched_enum_fixedpriority_with_pset_runqueue;
557 strlcpy(sched_string, kSchedFixedPriorityWithPsetRunqueueString, sizeof(sched_string));
558 kprintf("Scheduler: Runtime selection of %s\n", kSchedFixedPriorityWithPsetRunqueueString);
559 #endif
560 } else {
561 panic("Unrecognized scheduler algorithm: %s", sched_arg);
562 }
563 } else {
564 #if defined(CONFIG_SCHED_TRADITIONAL)
565 sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch;
566 _sched_enum = sched_enum_traditional_with_pset_runqueue;
567 strlcpy(sched_string, kSchedTraditionalWithPsetRunqueueString, sizeof(sched_string));
568 kprintf("Scheduler: Default of %s\n", kSchedTraditionalWithPsetRunqueueString);
569 #elif defined(CONFIG_SCHED_PROTO)
570 sched_current_dispatch = &sched_proto_dispatch;
571 _sched_enum = sched_enum_proto;
572 strlcpy(sched_string, kSchedProtoString, sizeof(sched_string));
573 kprintf("Scheduler: Default of %s\n", kSchedProtoString);
574 #elif defined(CONFIG_SCHED_GRRR)
575 sched_current_dispatch = &sched_grrr_dispatch;
576 _sched_enum = sched_enum_grrr;
577 strlcpy(sched_string, kSchedGRRRString, sizeof(sched_string));
578 kprintf("Scheduler: Default of %s\n", kSchedGRRRString);
579 #elif defined(CONFIG_SCHED_FIXEDPRIORITY)
580 sched_current_dispatch = &sched_fixedpriority_dispatch;
581 _sched_enum = sched_enum_fixedpriority;
582 strlcpy(sched_string, kSchedFixedPriorityString, sizeof(sched_string));
583 kprintf("Scheduler: Default of %s\n", kSchedFixedPriorityString);
584 #else
585 #error No default scheduler implementation
586 #endif
587 }
588
589 SCHED(init)();
590 SCHED(fairshare_init)();
591 sched_realtime_init();
592 ast_init();
593 sched_timer_deadline_tracking_init();
594
595 SCHED(pset_init)(&pset0);
596 SCHED(processor_init)(master_processor);
597 }
598
599 void
600 sched_timebase_init(void)
601 {
602 uint64_t abstime;
603
604 clock_interval_to_absolutetime_interval(1, NSEC_PER_SEC, &abstime);
605 sched_one_second_interval = abstime;
606
607 SCHED(timebase_init)();
608 sched_realtime_timebase_init();
609 }
610
611 #if defined(CONFIG_SCHED_TRADITIONAL)
612
613 static void
614 sched_traditional_init(void)
615 {
616 /*
617 * Calculate the timeslicing quantum
618 * in us.
619 */
620 if (default_preemption_rate < 1)
621 default_preemption_rate = DEFAULT_PREEMPTION_RATE;
622 std_quantum_us = (1000 * 1000) / default_preemption_rate;
623
624 printf("standard timeslicing quantum is %d us\n", std_quantum_us);
625
626 if (default_bg_preemption_rate < 1)
627 default_bg_preemption_rate = DEFAULT_BG_PREEMPTION_RATE;
628 bg_quantum_us = (1000 * 1000) / default_bg_preemption_rate;
629
630 printf("standard background quantum is %d us\n", bg_quantum_us);
631
632 load_shift_init();
633 preempt_pri_init();
634 sched_tick = 0;
635 }
636
637 static void
638 sched_traditional_timebase_init(void)
639 {
640 uint64_t abstime;
641 uint32_t shift;
642
643 /* standard timeslicing quantum */
644 clock_interval_to_absolutetime_interval(
645 std_quantum_us, NSEC_PER_USEC, &abstime);
646 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
647 std_quantum = (uint32_t)abstime;
648
649 /* smallest remaining quantum (250 us) */
650 clock_interval_to_absolutetime_interval(250, NSEC_PER_USEC, &abstime);
651 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
652 min_std_quantum = (uint32_t)abstime;
653
654 /* quantum for background tasks */
655 clock_interval_to_absolutetime_interval(
656 bg_quantum_us, NSEC_PER_USEC, &abstime);
657 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
658 bg_quantum = (uint32_t)abstime;
659
660 /* scheduler tick interval */
661 clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT,
662 NSEC_PER_USEC, &abstime);
663 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
664 sched_tick_interval = (uint32_t)abstime;
665
666 /*
667 * Compute conversion factor from usage to
668 * timesharing priorities with 5/8 ** n aging.
669 */
670 abstime = (abstime * 5) / 3;
671 for (shift = 0; abstime > BASEPRI_DEFAULT; ++shift)
672 abstime >>= 1;
673 sched_fixed_shift = shift;
674
675 max_unsafe_computation = max_unsafe_quanta * std_quantum;
676 sched_safe_duration = 2 * max_unsafe_quanta * std_quantum;
677
678 max_poll_computation = max_poll_quanta * std_quantum;
679 thread_depress_time = 1 * std_quantum;
680 default_timeshare_computation = std_quantum / 2;
681 default_timeshare_constraint = std_quantum;
682
683 }
684
685 static void
686 sched_traditional_processor_init(processor_t processor)
687 {
688 if (!sched_traditional_use_pset_runqueue) {
689 run_queue_init(&processor->runq);
690 }
691 processor->runq_bound_count = 0;
692 }
693
694 static void
695 sched_traditional_pset_init(processor_set_t pset)
696 {
697 if (sched_traditional_use_pset_runqueue) {
698 run_queue_init(&pset->pset_runq);
699 }
700 pset->pset_runq_bound_count = 0;
701 }
702
703 static void
704 sched_traditional_with_pset_runqueue_init(void)
705 {
706 sched_traditional_init();
707 sched_traditional_use_pset_runqueue = TRUE;
708 }
709
710 #endif /* CONFIG_SCHED_TRADITIONAL */
711
712 #if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY)
713 void
714 sched_traditional_fairshare_init(void)
715 {
716 simple_lock_init(&fs_lock, 0);
717
718 fs_runq.count = 0;
719 queue_init(&fs_runq.queue);
720 }
721 #endif
722
723 static void
724 sched_realtime_init(void)
725 {
726 simple_lock_init(&rt_lock, 0);
727
728 rt_runq.count = 0;
729 queue_init(&rt_runq.queue);
730 }
731
732 static void
733 sched_realtime_timebase_init(void)
734 {
735 uint64_t abstime;
736
737 /* smallest rt computaton (50 us) */
738 clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime);
739 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
740 min_rt_quantum = (uint32_t)abstime;
741
742 /* maximum rt computation (50 ms) */
743 clock_interval_to_absolutetime_interval(
744 50, 1000*NSEC_PER_USEC, &abstime);
745 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
746 max_rt_quantum = (uint32_t)abstime;
747
748 }
749
750 #if defined(CONFIG_SCHED_TRADITIONAL)
751
752 /*
753 * Set up values for timeshare
754 * loading factors.
755 */
756 static void
757 load_shift_init(void)
758 {
759 int8_t k, *p = sched_load_shifts;
760 uint32_t i, j;
761
762 uint32_t sched_decay_penalty = 1;
763
764 if (PE_parse_boot_argn("sched_decay_penalty", &sched_decay_penalty, sizeof (sched_decay_penalty))) {
765 kprintf("Overriding scheduler decay penalty %u\n", sched_decay_penalty);
766 }
767
768 if (PE_parse_boot_argn("sched_decay_usage_age_factor", &sched_decay_usage_age_factor, sizeof (sched_decay_usage_age_factor))) {
769 kprintf("Overriding scheduler decay usage age factor %u\n", sched_decay_usage_age_factor);
770 }
771
772 if (PE_parse_boot_argn("sched_use_combined_fgbg_decay", &sched_use_combined_fgbg_decay, sizeof (sched_use_combined_fgbg_decay))) {
773 kprintf("Overriding schedule fg/bg decay calculation: %u\n", sched_use_combined_fgbg_decay);
774 }
775
776 if (sched_decay_penalty == 0) {
777 /*
778 * There is no penalty for timeshare threads for using too much
779 * CPU, so set all load shifts to INT8_MIN. Even under high load,
780 * sched_pri_shift will be >INT8_MAX, and there will be no
781 * penalty applied to threads (nor will sched_usage be updated per
782 * thread).
783 */
784 for (i = 0; i < NRQS; i++) {
785 sched_load_shifts[i] = INT8_MIN;
786 }
787
788 return;
789 }
790
791 *p++ = INT8_MIN; *p++ = 0;
792
793 /*
794 * For a given system load "i", the per-thread priority
795 * penalty per quantum of CPU usage is ~2^k priority
796 * levels. "sched_decay_penalty" can cause more
797 * array entries to be filled with smaller "k" values
798 */
799 for (i = 2, j = 1 << sched_decay_penalty, k = 1; i < NRQS; ++k) {
800 for (j <<= 1; (i < j) && (i < NRQS); ++i)
801 *p++ = k;
802 }
803 }
804
805 static void
806 preempt_pri_init(void)
807 {
808 int i, *p = sched_preempt_pri;
809
810 for (i = BASEPRI_FOREGROUND; i < MINPRI_KERNEL; ++i)
811 setbit(i, p);
812
813 for (i = BASEPRI_PREEMPT; i <= MAXPRI; ++i)
814 setbit(i, p);
815 }
816
817 #endif /* CONFIG_SCHED_TRADITIONAL */
818
819 /*
820 * Thread wait timer expiration.
821 */
822 void
823 thread_timer_expire(
824 void *p0,
825 __unused void *p1)
826 {
827 thread_t thread = p0;
828 spl_t s;
829
830 s = splsched();
831 thread_lock(thread);
832 if (--thread->wait_timer_active == 0) {
833 if (thread->wait_timer_is_set) {
834 thread->wait_timer_is_set = FALSE;
835 clear_wait_internal(thread, THREAD_TIMED_OUT);
836 }
837 }
838 thread_unlock(thread);
839 splx(s);
840 }
841
842 /*
843 * thread_unblock:
844 *
845 * Unblock thread on wake up.
846 *
847 * Returns TRUE if the thread is still running.
848 *
849 * Thread must be locked.
850 */
851 boolean_t
852 thread_unblock(
853 thread_t thread,
854 wait_result_t wresult)
855 {
856 boolean_t result = FALSE;
857 thread_t cthread = current_thread();
858
859 /*
860 * Set wait_result.
861 */
862 thread->wait_result = wresult;
863
864 /*
865 * Cancel pending wait timer.
866 */
867 if (thread->wait_timer_is_set) {
868 if (timer_call_cancel(&thread->wait_timer))
869 thread->wait_timer_active--;
870 thread->wait_timer_is_set = FALSE;
871 }
872
873 /*
874 * Update scheduling state: not waiting,
875 * set running.
876 */
877 thread->state &= ~(TH_WAIT|TH_UNINT);
878
879 if (!(thread->state & TH_RUN)) {
880 thread->state |= TH_RUN;
881
882 (*thread->sched_call)(SCHED_CALL_UNBLOCK, thread);
883
884 /*
885 * Update run counts.
886 */
887 sched_run_incr();
888 if (thread->sched_mode == TH_MODE_TIMESHARE) {
889 sched_share_incr();
890
891 if (thread->max_priority <= MAXPRI_THROTTLE)
892 sched_background_incr();
893 }
894 }
895 else {
896 /*
897 * Signal if idling on another processor.
898 */
899 #if CONFIG_SCHED_IDLE_IN_PLACE
900 if (thread->state & TH_IDLE) {
901 processor_t processor = thread->last_processor;
902
903 if (processor != current_processor())
904 machine_signal_idle(processor);
905 }
906 #else
907 assert((thread->state & TH_IDLE) == 0);
908 #endif
909
910 result = TRUE;
911 }
912
913 /*
914 * Calculate deadline for real-time threads.
915 */
916 if (thread->sched_mode == TH_MODE_REALTIME) {
917 thread->realtime.deadline = thread->realtime.constraint + mach_absolute_time();
918 }
919
920 /*
921 * Clear old quantum, fail-safe computation, etc.
922 */
923 thread->current_quantum = 0;
924 thread->computation_metered = 0;
925 thread->reason = AST_NONE;
926
927 /* Obtain power-relevant interrupt and "platform-idle exit" statistics.
928 * We also account for "double hop" thread signaling via
929 * the thread callout infrastructure.
930 * DRK: consider removing the callout wakeup counters in the future
931 * they're present for verification at the moment.
932 */
933 boolean_t aticontext, pidle;
934 ml_get_power_state(&aticontext, &pidle);
935
936 if (__improbable(aticontext && !(thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT))) {
937 ledger_credit(thread->t_ledger, task_ledgers.interrupt_wakeups, 1);
938 DTRACE_SCHED2(iwakeup, struct thread *, thread, struct proc *, thread->task->bsd_info);
939
940 uint64_t ttd = PROCESSOR_DATA(current_processor(), timer_call_ttd);
941
942 if (ttd) {
943 if (ttd <= timer_deadline_tracking_bin_1)
944 thread->thread_timer_wakeups_bin_1++;
945 else
946 if (ttd <= timer_deadline_tracking_bin_2)
947 thread->thread_timer_wakeups_bin_2++;
948 }
949
950 if (pidle) {
951 ledger_credit(thread->t_ledger, task_ledgers.platform_idle_wakeups, 1);
952 }
953
954 } else if (thread_get_tag_internal(cthread) & THREAD_TAG_CALLOUT) {
955 if (cthread->callout_woken_from_icontext) {
956 ledger_credit(thread->t_ledger, task_ledgers.interrupt_wakeups, 1);
957 thread->thread_callout_interrupt_wakeups++;
958 if (cthread->callout_woken_from_platform_idle) {
959 ledger_credit(thread->t_ledger, task_ledgers.platform_idle_wakeups, 1);
960 thread->thread_callout_platform_idle_wakeups++;
961 }
962
963 cthread->callout_woke_thread = TRUE;
964 }
965 }
966
967 if (thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT) {
968 thread->callout_woken_from_icontext = aticontext;
969 thread->callout_woken_from_platform_idle = pidle;
970 thread->callout_woke_thread = FALSE;
971 }
972
973 /* Event should only be triggered if thread is not already running */
974 if (result == FALSE) {
975 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
976 MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE,
977 (uintptr_t)thread_tid(thread), thread->sched_pri, thread->wait_result, 0, 0);
978 }
979
980 DTRACE_SCHED2(wakeup, struct thread *, thread, struct proc *, thread->task->bsd_info);
981
982 return (result);
983 }
984
985 /*
986 * Routine: thread_go
987 * Purpose:
988 * Unblock and dispatch thread.
989 * Conditions:
990 * thread lock held, IPC locks may be held.
991 * thread must have been pulled from wait queue under same lock hold.
992 * Returns:
993 * KERN_SUCCESS - Thread was set running
994 * KERN_NOT_WAITING - Thread was not waiting
995 */
996 kern_return_t
997 thread_go(
998 thread_t thread,
999 wait_result_t wresult)
1000 {
1001 assert(thread->at_safe_point == FALSE);
1002 assert(thread->wait_event == NO_EVENT64);
1003 assert(thread->wait_queue == WAIT_QUEUE_NULL);
1004
1005 if ((thread->state & (TH_WAIT|TH_TERMINATE)) == TH_WAIT) {
1006 if (!thread_unblock(thread, wresult))
1007 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1008
1009 return (KERN_SUCCESS);
1010 }
1011
1012 return (KERN_NOT_WAITING);
1013 }
1014
1015 /*
1016 * Routine: thread_mark_wait_locked
1017 * Purpose:
1018 * Mark a thread as waiting. If, given the circumstances,
1019 * it doesn't want to wait (i.e. already aborted), then
1020 * indicate that in the return value.
1021 * Conditions:
1022 * at splsched() and thread is locked.
1023 */
1024 __private_extern__
1025 wait_result_t
1026 thread_mark_wait_locked(
1027 thread_t thread,
1028 wait_interrupt_t interruptible)
1029 {
1030 boolean_t at_safe_point;
1031
1032 assert(thread == current_thread());
1033
1034 /*
1035 * The thread may have certain types of interrupts/aborts masked
1036 * off. Even if the wait location says these types of interrupts
1037 * are OK, we have to honor mask settings (outer-scoped code may
1038 * not be able to handle aborts at the moment).
1039 */
1040 if (interruptible > (thread->options & TH_OPT_INTMASK))
1041 interruptible = thread->options & TH_OPT_INTMASK;
1042
1043 at_safe_point = (interruptible == THREAD_ABORTSAFE);
1044
1045 if ( interruptible == THREAD_UNINT ||
1046 !(thread->sched_flags & TH_SFLAG_ABORT) ||
1047 (!at_safe_point &&
1048 (thread->sched_flags & TH_SFLAG_ABORTSAFELY))) {
1049
1050 if ( !(thread->state & TH_TERMINATE))
1051 DTRACE_SCHED(sleep);
1052
1053 thread->state |= (interruptible) ? TH_WAIT : (TH_WAIT | TH_UNINT);
1054 thread->at_safe_point = at_safe_point;
1055 return (thread->wait_result = THREAD_WAITING);
1056 }
1057 else
1058 if (thread->sched_flags & TH_SFLAG_ABORTSAFELY)
1059 thread->sched_flags &= ~TH_SFLAG_ABORTED_MASK;
1060
1061 return (thread->wait_result = THREAD_INTERRUPTED);
1062 }
1063
1064 /*
1065 * Routine: thread_interrupt_level
1066 * Purpose:
1067 * Set the maximum interruptible state for the
1068 * current thread. The effective value of any
1069 * interruptible flag passed into assert_wait
1070 * will never exceed this.
1071 *
1072 * Useful for code that must not be interrupted,
1073 * but which calls code that doesn't know that.
1074 * Returns:
1075 * The old interrupt level for the thread.
1076 */
1077 __private_extern__
1078 wait_interrupt_t
1079 thread_interrupt_level(
1080 wait_interrupt_t new_level)
1081 {
1082 thread_t thread = current_thread();
1083 wait_interrupt_t result = thread->options & TH_OPT_INTMASK;
1084
1085 thread->options = (thread->options & ~TH_OPT_INTMASK) | (new_level & TH_OPT_INTMASK);
1086
1087 return result;
1088 }
1089
1090 /*
1091 * Check to see if an assert wait is possible, without actually doing one.
1092 * This is used by debug code in locks and elsewhere to verify that it is
1093 * always OK to block when trying to take a blocking lock (since waiting
1094 * for the actual assert_wait to catch the case may make it hard to detect
1095 * this case.
1096 */
1097 boolean_t
1098 assert_wait_possible(void)
1099 {
1100
1101 thread_t thread;
1102
1103 #if DEBUG
1104 if(debug_mode) return TRUE; /* Always succeed in debug mode */
1105 #endif
1106
1107 thread = current_thread();
1108
1109 return (thread == NULL || wait_queue_assert_possible(thread));
1110 }
1111
1112 /*
1113 * assert_wait:
1114 *
1115 * Assert that the current thread is about to go to
1116 * sleep until the specified event occurs.
1117 */
1118 wait_result_t
1119 assert_wait(
1120 event_t event,
1121 wait_interrupt_t interruptible)
1122 {
1123 register wait_queue_t wq;
1124 register int index;
1125
1126 assert(event != NO_EVENT);
1127
1128 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1129 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
1130 VM_KERNEL_UNSLIDE(event), 0, 0, 0, 0);
1131
1132 index = wait_hash(event);
1133 wq = &wait_queues[index];
1134 return wait_queue_assert_wait(wq, event, interruptible, 0);
1135 }
1136
1137 wait_result_t
1138 assert_wait_timeout(
1139 event_t event,
1140 wait_interrupt_t interruptible,
1141 uint32_t interval,
1142 uint32_t scale_factor)
1143 {
1144 thread_t thread = current_thread();
1145 wait_result_t wresult;
1146 wait_queue_t wqueue;
1147 uint64_t deadline;
1148 spl_t s;
1149
1150 assert(event != NO_EVENT);
1151 wqueue = &wait_queues[wait_hash(event)];
1152
1153 s = splsched();
1154 wait_queue_lock(wqueue);
1155 thread_lock(thread);
1156
1157 clock_interval_to_deadline(interval, scale_factor, &deadline);
1158
1159 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1160 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
1161 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
1162
1163 wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t, event),
1164 interruptible,
1165 TIMEOUT_URGENCY_SYS_NORMAL,
1166 deadline, 0,
1167 thread);
1168
1169 thread_unlock(thread);
1170 wait_queue_unlock(wqueue);
1171 splx(s);
1172
1173 return (wresult);
1174 }
1175
1176 wait_result_t
1177 assert_wait_timeout_with_leeway(
1178 event_t event,
1179 wait_interrupt_t interruptible,
1180 wait_timeout_urgency_t urgency,
1181 uint32_t interval,
1182 uint32_t leeway,
1183 uint32_t scale_factor)
1184 {
1185 thread_t thread = current_thread();
1186 wait_result_t wresult;
1187 wait_queue_t wqueue;
1188 uint64_t deadline;
1189 uint64_t abstime;
1190 uint64_t slop;
1191 uint64_t now;
1192 spl_t s;
1193
1194 now = mach_absolute_time();
1195 clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime);
1196 deadline = now + abstime;
1197
1198 clock_interval_to_absolutetime_interval(leeway, scale_factor, &slop);
1199
1200 assert(event != NO_EVENT);
1201 wqueue = &wait_queues[wait_hash(event)];
1202
1203 s = splsched();
1204 wait_queue_lock(wqueue);
1205 thread_lock(thread);
1206
1207 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1208 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
1209 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
1210
1211 wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t, event),
1212 interruptible,
1213 urgency, deadline, slop,
1214 thread);
1215
1216 thread_unlock(thread);
1217 wait_queue_unlock(wqueue);
1218 splx(s);
1219
1220 return (wresult);
1221 }
1222
1223 wait_result_t
1224 assert_wait_deadline(
1225 event_t event,
1226 wait_interrupt_t interruptible,
1227 uint64_t deadline)
1228 {
1229 thread_t thread = current_thread();
1230 wait_result_t wresult;
1231 wait_queue_t wqueue;
1232 spl_t s;
1233
1234 assert(event != NO_EVENT);
1235 wqueue = &wait_queues[wait_hash(event)];
1236
1237 s = splsched();
1238 wait_queue_lock(wqueue);
1239 thread_lock(thread);
1240
1241 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1242 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
1243 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
1244
1245 wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t,event),
1246 interruptible,
1247 TIMEOUT_URGENCY_SYS_NORMAL, deadline, 0,
1248 thread);
1249
1250 thread_unlock(thread);
1251 wait_queue_unlock(wqueue);
1252 splx(s);
1253
1254 return (wresult);
1255 }
1256
1257 wait_result_t
1258 assert_wait_deadline_with_leeway(
1259 event_t event,
1260 wait_interrupt_t interruptible,
1261 wait_timeout_urgency_t urgency,
1262 uint64_t deadline,
1263 uint64_t leeway)
1264 {
1265 thread_t thread = current_thread();
1266 wait_result_t wresult;
1267 wait_queue_t wqueue;
1268 spl_t s;
1269
1270 assert(event != NO_EVENT);
1271 wqueue = &wait_queues[wait_hash(event)];
1272
1273 s = splsched();
1274 wait_queue_lock(wqueue);
1275 thread_lock(thread);
1276
1277 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1278 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
1279 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
1280
1281 wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t,event),
1282 interruptible,
1283 urgency, deadline, leeway,
1284 thread);
1285
1286 thread_unlock(thread);
1287 wait_queue_unlock(wqueue);
1288 splx(s);
1289
1290 return (wresult);
1291 }
1292
1293 /*
1294 * thread_sleep_fast_usimple_lock:
1295 *
1296 * Cause the current thread to wait until the specified event
1297 * occurs. The specified simple_lock is unlocked before releasing
1298 * the cpu and re-acquired as part of waking up.
1299 *
1300 * This is the simple lock sleep interface for components that use a
1301 * faster version of simple_lock() than is provided by usimple_lock().
1302 */
1303 __private_extern__ wait_result_t
1304 thread_sleep_fast_usimple_lock(
1305 event_t event,
1306 simple_lock_t lock,
1307 wait_interrupt_t interruptible)
1308 {
1309 wait_result_t res;
1310
1311 res = assert_wait(event, interruptible);
1312 if (res == THREAD_WAITING) {
1313 simple_unlock(lock);
1314 res = thread_block(THREAD_CONTINUE_NULL);
1315 simple_lock(lock);
1316 }
1317 return res;
1318 }
1319
1320
1321 /*
1322 * thread_sleep_usimple_lock:
1323 *
1324 * Cause the current thread to wait until the specified event
1325 * occurs. The specified usimple_lock is unlocked before releasing
1326 * the cpu and re-acquired as part of waking up.
1327 *
1328 * This is the simple lock sleep interface for components where
1329 * simple_lock() is defined in terms of usimple_lock().
1330 */
1331 wait_result_t
1332 thread_sleep_usimple_lock(
1333 event_t event,
1334 usimple_lock_t lock,
1335 wait_interrupt_t interruptible)
1336 {
1337 wait_result_t res;
1338
1339 res = assert_wait(event, interruptible);
1340 if (res == THREAD_WAITING) {
1341 usimple_unlock(lock);
1342 res = thread_block(THREAD_CONTINUE_NULL);
1343 usimple_lock(lock);
1344 }
1345 return res;
1346 }
1347
1348 /*
1349 * thread_sleep_lock_write:
1350 *
1351 * Cause the current thread to wait until the specified event
1352 * occurs. The specified (write) lock is unlocked before releasing
1353 * the cpu. The (write) lock will be re-acquired before returning.
1354 */
1355 wait_result_t
1356 thread_sleep_lock_write(
1357 event_t event,
1358 lock_t *lock,
1359 wait_interrupt_t interruptible)
1360 {
1361 wait_result_t res;
1362
1363 res = assert_wait(event, interruptible);
1364 if (res == THREAD_WAITING) {
1365 lock_write_done(lock);
1366 res = thread_block(THREAD_CONTINUE_NULL);
1367 lock_write(lock);
1368 }
1369 return res;
1370 }
1371
1372 /*
1373 * thread_isoncpu:
1374 *
1375 * Return TRUE if a thread is running on a processor such that an AST
1376 * is needed to pull it out of userspace execution, or if executing in
1377 * the kernel, bring to a context switch boundary that would cause
1378 * thread state to be serialized in the thread PCB.
1379 *
1380 * Thread locked, returns the same way. While locked, fields
1381 * like "state" and "runq" cannot change.
1382 */
1383 static inline boolean_t
1384 thread_isoncpu(thread_t thread)
1385 {
1386 /* Not running or runnable */
1387 if (!(thread->state & TH_RUN))
1388 return (FALSE);
1389
1390 /* Waiting on a runqueue, not currently running */
1391 if (thread->runq != PROCESSOR_NULL)
1392 return (FALSE);
1393
1394 /*
1395 * Thread must be running on a processor, or
1396 * about to run, or just did run. In all these
1397 * cases, an AST to the processor is needed
1398 * to guarantee that the thread is kicked out
1399 * of userspace and the processor has
1400 * context switched (and saved register state).
1401 */
1402 return (TRUE);
1403 }
1404
1405 /*
1406 * thread_stop:
1407 *
1408 * Force a preemption point for a thread and wait
1409 * for it to stop running on a CPU. If a stronger
1410 * guarantee is requested, wait until no longer
1411 * runnable. Arbitrates access among
1412 * multiple stop requests. (released by unstop)
1413 *
1414 * The thread must enter a wait state and stop via a
1415 * separate means.
1416 *
1417 * Returns FALSE if interrupted.
1418 */
1419 boolean_t
1420 thread_stop(
1421 thread_t thread,
1422 boolean_t until_not_runnable)
1423 {
1424 wait_result_t wresult;
1425 spl_t s = splsched();
1426 boolean_t oncpu;
1427
1428 wake_lock(thread);
1429 thread_lock(thread);
1430
1431 while (thread->state & TH_SUSP) {
1432 thread->wake_active = TRUE;
1433 thread_unlock(thread);
1434
1435 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
1436 wake_unlock(thread);
1437 splx(s);
1438
1439 if (wresult == THREAD_WAITING)
1440 wresult = thread_block(THREAD_CONTINUE_NULL);
1441
1442 if (wresult != THREAD_AWAKENED)
1443 return (FALSE);
1444
1445 s = splsched();
1446 wake_lock(thread);
1447 thread_lock(thread);
1448 }
1449
1450 thread->state |= TH_SUSP;
1451
1452 while ((oncpu = thread_isoncpu(thread)) ||
1453 (until_not_runnable && (thread->state & TH_RUN))) {
1454 processor_t processor;
1455
1456 if (oncpu) {
1457 assert(thread->state & TH_RUN);
1458 processor = thread->chosen_processor;
1459 cause_ast_check(processor);
1460 }
1461
1462 thread->wake_active = TRUE;
1463 thread_unlock(thread);
1464
1465 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
1466 wake_unlock(thread);
1467 splx(s);
1468
1469 if (wresult == THREAD_WAITING)
1470 wresult = thread_block(THREAD_CONTINUE_NULL);
1471
1472 if (wresult != THREAD_AWAKENED) {
1473 thread_unstop(thread);
1474 return (FALSE);
1475 }
1476
1477 s = splsched();
1478 wake_lock(thread);
1479 thread_lock(thread);
1480 }
1481
1482 thread_unlock(thread);
1483 wake_unlock(thread);
1484 splx(s);
1485
1486 /*
1487 * We return with the thread unlocked. To prevent it from
1488 * transitioning to a runnable state (or from TH_RUN to
1489 * being on the CPU), the caller must ensure the thread
1490 * is stopped via an external means (such as an AST)
1491 */
1492
1493 return (TRUE);
1494 }
1495
1496 /*
1497 * thread_unstop:
1498 *
1499 * Release a previous stop request and set
1500 * the thread running if appropriate.
1501 *
1502 * Use only after a successful stop operation.
1503 */
1504 void
1505 thread_unstop(
1506 thread_t thread)
1507 {
1508 spl_t s = splsched();
1509
1510 wake_lock(thread);
1511 thread_lock(thread);
1512
1513 if ((thread->state & (TH_RUN|TH_WAIT|TH_SUSP)) == TH_SUSP) {
1514 thread->state &= ~TH_SUSP;
1515 thread_unblock(thread, THREAD_AWAKENED);
1516
1517 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1518 }
1519 else
1520 if (thread->state & TH_SUSP) {
1521 thread->state &= ~TH_SUSP;
1522
1523 if (thread->wake_active) {
1524 thread->wake_active = FALSE;
1525 thread_unlock(thread);
1526
1527 thread_wakeup(&thread->wake_active);
1528 wake_unlock(thread);
1529 splx(s);
1530
1531 return;
1532 }
1533 }
1534
1535 thread_unlock(thread);
1536 wake_unlock(thread);
1537 splx(s);
1538 }
1539
1540 /*
1541 * thread_wait:
1542 *
1543 * Wait for a thread to stop running. (non-interruptible)
1544 *
1545 */
1546 void
1547 thread_wait(
1548 thread_t thread,
1549 boolean_t until_not_runnable)
1550 {
1551 wait_result_t wresult;
1552 boolean_t oncpu;
1553 processor_t processor;
1554 spl_t s = splsched();
1555
1556 wake_lock(thread);
1557 thread_lock(thread);
1558
1559 /*
1560 * Wait until not running on a CPU. If stronger requirement
1561 * desired, wait until not runnable. Assumption: if thread is
1562 * on CPU, then TH_RUN is set, so we're not waiting in any case
1563 * where the original, pure "TH_RUN" check would have let us
1564 * finish.
1565 */
1566 while ((oncpu = thread_isoncpu(thread)) ||
1567 (until_not_runnable && (thread->state & TH_RUN))) {
1568
1569 if (oncpu) {
1570 assert(thread->state & TH_RUN);
1571 processor = thread->chosen_processor;
1572 cause_ast_check(processor);
1573 }
1574
1575 thread->wake_active = TRUE;
1576 thread_unlock(thread);
1577
1578 wresult = assert_wait(&thread->wake_active, THREAD_UNINT);
1579 wake_unlock(thread);
1580 splx(s);
1581
1582 if (wresult == THREAD_WAITING)
1583 thread_block(THREAD_CONTINUE_NULL);
1584
1585 s = splsched();
1586 wake_lock(thread);
1587 thread_lock(thread);
1588 }
1589
1590 thread_unlock(thread);
1591 wake_unlock(thread);
1592 splx(s);
1593 }
1594
1595 /*
1596 * Routine: clear_wait_internal
1597 *
1598 * Clear the wait condition for the specified thread.
1599 * Start the thread executing if that is appropriate.
1600 * Arguments:
1601 * thread thread to awaken
1602 * result Wakeup result the thread should see
1603 * Conditions:
1604 * At splsched
1605 * the thread is locked.
1606 * Returns:
1607 * KERN_SUCCESS thread was rousted out a wait
1608 * KERN_FAILURE thread was waiting but could not be rousted
1609 * KERN_NOT_WAITING thread was not waiting
1610 */
1611 __private_extern__ kern_return_t
1612 clear_wait_internal(
1613 thread_t thread,
1614 wait_result_t wresult)
1615 {
1616 wait_queue_t wq = thread->wait_queue;
1617 uint32_t i = LockTimeOut;
1618
1619 do {
1620 if (wresult == THREAD_INTERRUPTED && (thread->state & TH_UNINT))
1621 return (KERN_FAILURE);
1622
1623 if (wq != WAIT_QUEUE_NULL) {
1624 if (wait_queue_lock_try(wq)) {
1625 wait_queue_pull_thread_locked(wq, thread, TRUE);
1626 /* wait queue unlocked, thread still locked */
1627 }
1628 else {
1629 thread_unlock(thread);
1630 delay(1);
1631
1632 thread_lock(thread);
1633 if (wq != thread->wait_queue)
1634 return (KERN_NOT_WAITING);
1635
1636 continue;
1637 }
1638 }
1639
1640 return (thread_go(thread, wresult));
1641 } while ((--i > 0) || machine_timeout_suspended());
1642
1643 panic("clear_wait_internal: deadlock: thread=%p, wq=%p, cpu=%d\n",
1644 thread, wq, cpu_number());
1645
1646 return (KERN_FAILURE);
1647 }
1648
1649
1650 /*
1651 * clear_wait:
1652 *
1653 * Clear the wait condition for the specified thread. Start the thread
1654 * executing if that is appropriate.
1655 *
1656 * parameters:
1657 * thread thread to awaken
1658 * result Wakeup result the thread should see
1659 */
1660 kern_return_t
1661 clear_wait(
1662 thread_t thread,
1663 wait_result_t result)
1664 {
1665 kern_return_t ret;
1666 spl_t s;
1667
1668 s = splsched();
1669 thread_lock(thread);
1670 ret = clear_wait_internal(thread, result);
1671 thread_unlock(thread);
1672 splx(s);
1673 return ret;
1674 }
1675
1676
1677 /*
1678 * thread_wakeup_prim:
1679 *
1680 * Common routine for thread_wakeup, thread_wakeup_with_result,
1681 * and thread_wakeup_one.
1682 *
1683 */
1684 kern_return_t
1685 thread_wakeup_prim(
1686 event_t event,
1687 boolean_t one_thread,
1688 wait_result_t result)
1689 {
1690 return (thread_wakeup_prim_internal(event, one_thread, result, -1));
1691 }
1692
1693
1694 kern_return_t
1695 thread_wakeup_prim_internal(
1696 event_t event,
1697 boolean_t one_thread,
1698 wait_result_t result,
1699 int priority)
1700 {
1701 register wait_queue_t wq;
1702 register int index;
1703
1704 index = wait_hash(event);
1705 wq = &wait_queues[index];
1706 if (one_thread)
1707 return (wait_queue_wakeup_one(wq, event, result, priority));
1708 else
1709 return (wait_queue_wakeup_all(wq, event, result));
1710 }
1711
1712 /*
1713 * thread_bind:
1714 *
1715 * Force the current thread to execute on the specified processor.
1716 *
1717 * Returns the previous binding. PROCESSOR_NULL means
1718 * not bound.
1719 *
1720 * XXX - DO NOT export this to users - XXX
1721 */
1722 processor_t
1723 thread_bind(
1724 processor_t processor)
1725 {
1726 thread_t self = current_thread();
1727 processor_t prev;
1728 spl_t s;
1729
1730 s = splsched();
1731 thread_lock(self);
1732
1733 prev = self->bound_processor;
1734 self->bound_processor = processor;
1735
1736 thread_unlock(self);
1737 splx(s);
1738
1739 return (prev);
1740 }
1741
1742 /*
1743 * thread_select:
1744 *
1745 * Select a new thread for the current processor to execute.
1746 *
1747 * May select the current thread, which must be locked.
1748 */
1749 static thread_t
1750 thread_select(
1751 thread_t thread,
1752 processor_t processor)
1753 {
1754 processor_set_t pset = processor->processor_set;
1755 thread_t new_thread = THREAD_NULL;
1756 boolean_t inactive_state;
1757
1758 assert(processor == current_processor());
1759
1760 do {
1761 /*
1762 * Update the priority.
1763 */
1764 if (SCHED(can_update_priority)(thread))
1765 SCHED(update_priority)(thread);
1766
1767 processor->current_pri = thread->sched_pri;
1768 processor->current_thmode = thread->sched_mode;
1769
1770 pset_lock(pset);
1771
1772 assert(pset->low_count);
1773 assert(pset->low_pri);
1774
1775 if (processor->processor_meta != PROCESSOR_META_NULL && processor->processor_meta->primary != processor) {
1776 /*
1777 * Should this secondary SMT processor attempt to find work? For pset runqueue systems,
1778 * we should look for work only under the same conditions that choose_processor()
1779 * would have assigned work, which is when all primary processors have been assigned work.
1780 *
1781 * An exception is that bound threads are dispatched to a processor without going through
1782 * choose_processor(), so in those cases we should continue trying to dequeue work.
1783 */
1784 if (!processor->runq_bound_count && !queue_empty(&pset->idle_queue) && !rt_runq.count) {
1785 goto idle;
1786 }
1787 }
1788
1789 inactive_state = processor->state != PROCESSOR_SHUTDOWN && machine_processor_is_inactive(processor);
1790
1791 simple_lock(&rt_lock);
1792
1793 /*
1794 * Test to see if the current thread should continue
1795 * to run on this processor. Must be runnable, and not
1796 * bound to a different processor, nor be in the wrong
1797 * processor set.
1798 */
1799 if ( ((thread->state & ~TH_SUSP) == TH_RUN) &&
1800 (thread->sched_pri >= BASEPRI_RTQUEUES ||
1801 processor->processor_meta == PROCESSOR_META_NULL ||
1802 processor->processor_meta->primary == processor) &&
1803 (thread->bound_processor == PROCESSOR_NULL ||
1804 thread->bound_processor == processor) &&
1805 (thread->affinity_set == AFFINITY_SET_NULL ||
1806 thread->affinity_set->aset_pset == pset)) {
1807 if (thread->sched_pri >= BASEPRI_RTQUEUES &&
1808 first_timeslice(processor)) {
1809 if (rt_runq.count > 0) {
1810 register queue_t q;
1811
1812 q = &rt_runq.queue;
1813 if (((thread_t)q->next)->realtime.deadline <
1814 processor->deadline) {
1815 if ((((thread_t)q->next)->bound_processor == PROCESSOR_NULL) || (((thread_t)q->next)->bound_processor == processor)) {
1816 thread = (thread_t)dequeue_head(q);
1817 thread->runq = PROCESSOR_NULL;
1818 SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
1819 rt_runq.count--;
1820 }
1821 }
1822 }
1823
1824 simple_unlock(&rt_lock);
1825
1826 processor->deadline = thread->realtime.deadline;
1827
1828 pset_unlock(pset);
1829
1830 return (thread);
1831 }
1832
1833 if (!inactive_state && (thread->sched_mode != TH_MODE_FAIRSHARE || SCHED(fairshare_runq_count)() == 0) && (rt_runq.count == 0 || BASEPRI_RTQUEUES < thread->sched_pri) &&
1834 (new_thread = SCHED(choose_thread)(processor, thread->sched_mode == TH_MODE_FAIRSHARE ? MINPRI : thread->sched_pri)) == THREAD_NULL) {
1835
1836 simple_unlock(&rt_lock);
1837
1838 /* I am the highest priority runnable (non-idle) thread */
1839
1840 pset_pri_hint(pset, processor, processor->current_pri);
1841
1842 pset_count_hint(pset, processor, SCHED(processor_runq_count)(processor));
1843
1844 processor->deadline = UINT64_MAX;
1845
1846 pset_unlock(pset);
1847
1848 return (thread);
1849 }
1850 }
1851
1852 if (new_thread != THREAD_NULL ||
1853 (SCHED(processor_queue_has_priority)(processor, rt_runq.count == 0 ? IDLEPRI : BASEPRI_RTQUEUES, TRUE) &&
1854 (new_thread = SCHED(choose_thread)(processor, MINPRI)) != THREAD_NULL)) {
1855 simple_unlock(&rt_lock);
1856
1857 if (!inactive_state) {
1858 pset_pri_hint(pset, processor, new_thread->sched_pri);
1859
1860 pset_count_hint(pset, processor, SCHED(processor_runq_count)(processor));
1861 }
1862
1863 processor->deadline = UINT64_MAX;
1864 pset_unlock(pset);
1865
1866 return (new_thread);
1867 }
1868
1869 if (rt_runq.count > 0) {
1870 thread = (thread_t)dequeue_head(&rt_runq.queue);
1871
1872 if (__probable((thread->bound_processor == NULL || (thread->bound_processor == processor)))) {
1873 thread->runq = PROCESSOR_NULL;
1874 SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
1875 rt_runq.count--;
1876
1877 simple_unlock(&rt_lock);
1878
1879 processor->deadline = thread->realtime.deadline;
1880 pset_unlock(pset);
1881
1882 return (thread);
1883 } else {
1884 enqueue_head(&rt_runq.queue, (queue_entry_t)thread);
1885 }
1886 }
1887
1888 simple_unlock(&rt_lock);
1889
1890 /* No realtime threads and no normal threads on the per-processor
1891 * runqueue. Finally check for global fairshare threads.
1892 */
1893 if ((new_thread = SCHED(fairshare_dequeue)()) != THREAD_NULL) {
1894
1895 processor->deadline = UINT64_MAX;
1896 pset_unlock(pset);
1897
1898 return (new_thread);
1899 }
1900
1901 processor->deadline = UINT64_MAX;
1902
1903 /*
1904 * Set processor inactive based on
1905 * indication from the platform code.
1906 */
1907 if (inactive_state) {
1908 if (processor->state == PROCESSOR_RUNNING)
1909 remqueue((queue_entry_t)processor);
1910 else
1911 if (processor->state == PROCESSOR_IDLE)
1912 remqueue((queue_entry_t)processor);
1913
1914 processor->state = PROCESSOR_INACTIVE;
1915
1916 pset_unlock(pset);
1917
1918 return (processor->idle_thread);
1919 }
1920
1921 /*
1922 * No runnable threads, attempt to steal
1923 * from other processors.
1924 */
1925 new_thread = SCHED(steal_thread)(pset);
1926 if (new_thread != THREAD_NULL) {
1927 return (new_thread);
1928 }
1929
1930 /*
1931 * If other threads have appeared, shortcut
1932 * around again.
1933 */
1934 if (!SCHED(processor_queue_empty)(processor) || rt_runq.count > 0 || SCHED(fairshare_runq_count)() > 0)
1935 continue;
1936
1937 pset_lock(pset);
1938
1939 idle:
1940 /*
1941 * Nothing is runnable, so set this processor idle if it
1942 * was running.
1943 */
1944 if (processor->state == PROCESSOR_RUNNING) {
1945 remqueue((queue_entry_t)processor);
1946 processor->state = PROCESSOR_IDLE;
1947
1948 if (processor->processor_meta == PROCESSOR_META_NULL || processor->processor_meta->primary == processor) {
1949 enqueue_head(&pset->idle_queue, (queue_entry_t)processor);
1950 pset_pri_init_hint(pset, processor);
1951 pset_count_init_hint(pset, processor);
1952 }
1953 else {
1954 enqueue_head(&processor->processor_meta->idle_queue, (queue_entry_t)processor);
1955 pset_unlock(pset);
1956 return (processor->idle_thread);
1957 }
1958 }
1959
1960 pset_unlock(pset);
1961
1962 #if CONFIG_SCHED_IDLE_IN_PLACE
1963 /*
1964 * Choose idle thread if fast idle is not possible.
1965 */
1966 if ((thread->state & (TH_IDLE|TH_TERMINATE|TH_SUSP)) || !(thread->state & TH_WAIT) || thread->wake_active || thread->sched_pri >= BASEPRI_RTQUEUES)
1967 return (processor->idle_thread);
1968
1969 /*
1970 * Perform idling activities directly without a
1971 * context switch. Return dispatched thread,
1972 * else check again for a runnable thread.
1973 */
1974 new_thread = thread_select_idle(thread, processor);
1975
1976 #else /* !CONFIG_SCHED_IDLE_IN_PLACE */
1977
1978 /*
1979 * Do a full context switch to idle so that the current
1980 * thread can start running on another processor without
1981 * waiting for the fast-idled processor to wake up.
1982 */
1983 return (processor->idle_thread);
1984
1985 #endif /* !CONFIG_SCHED_IDLE_IN_PLACE */
1986
1987 } while (new_thread == THREAD_NULL);
1988
1989 return (new_thread);
1990 }
1991
1992 #if CONFIG_SCHED_IDLE_IN_PLACE
1993 /*
1994 * thread_select_idle:
1995 *
1996 * Idle the processor using the current thread context.
1997 *
1998 * Called with thread locked, then dropped and relocked.
1999 */
2000 static thread_t
2001 thread_select_idle(
2002 thread_t thread,
2003 processor_t processor)
2004 {
2005 thread_t new_thread;
2006 uint64_t arg1, arg2;
2007 int urgency;
2008
2009 if (thread->sched_mode == TH_MODE_TIMESHARE) {
2010 if (thread->max_priority <= MAXPRI_THROTTLE)
2011 sched_background_decr();
2012
2013 sched_share_decr();
2014 }
2015 sched_run_decr();
2016
2017 thread->state |= TH_IDLE;
2018 processor->current_pri = IDLEPRI;
2019 processor->current_thmode = TH_MODE_NONE;
2020
2021 /* Reload precise timing global policy to thread-local policy */
2022 thread->precise_user_kernel_time = use_precise_user_kernel_time(thread);
2023
2024 thread_unlock(thread);
2025
2026 /*
2027 * Switch execution timing to processor idle thread.
2028 */
2029 processor->last_dispatch = mach_absolute_time();
2030 thread->last_run_time = processor->last_dispatch;
2031 thread_timer_event(processor->last_dispatch, &processor->idle_thread->system_timer);
2032 PROCESSOR_DATA(processor, kernel_timer) = &processor->idle_thread->system_timer;
2033
2034 /*
2035 * Cancel the quantum timer while idling.
2036 */
2037 timer_call_cancel(&processor->quantum_timer);
2038 processor->timeslice = 0;
2039
2040 (*thread->sched_call)(SCHED_CALL_BLOCK, thread);
2041
2042 thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0, NULL);
2043
2044 /*
2045 * Enable interrupts and perform idling activities. No
2046 * preemption due to TH_IDLE being set.
2047 */
2048 spllo(); new_thread = processor_idle(thread, processor);
2049
2050 /*
2051 * Return at splsched.
2052 */
2053 (*thread->sched_call)(SCHED_CALL_UNBLOCK, thread);
2054
2055 thread_lock(thread);
2056
2057 /*
2058 * If awakened, switch to thread timer and start a new quantum.
2059 * Otherwise skip; we will context switch to another thread or return here.
2060 */
2061 if (!(thread->state & TH_WAIT)) {
2062 processor->last_dispatch = mach_absolute_time();
2063 thread_timer_event(processor->last_dispatch, &thread->system_timer);
2064 PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
2065
2066 thread_quantum_init(thread);
2067 thread->last_quantum_refill_time = processor->last_dispatch;
2068
2069 processor->quantum_end = processor->last_dispatch + thread->current_quantum;
2070 timer_call_enter1(&processor->quantum_timer, thread, processor->quantum_end, TIMER_CALL_SYS_CRITICAL);
2071 processor->timeslice = 1;
2072
2073 thread->computation_epoch = processor->last_dispatch;
2074 }
2075
2076 thread->state &= ~TH_IDLE;
2077
2078 /*
2079 * If we idled in place, simulate a context switch back
2080 * to the original priority of the thread so that the
2081 * platform layer cannot distinguish this from a true
2082 * switch to the idle thread.
2083 */
2084
2085 urgency = thread_get_urgency(thread, &arg1, &arg2);
2086
2087 thread_tell_urgency(urgency, arg1, arg2, new_thread);
2088
2089 sched_run_incr();
2090 if (thread->sched_mode == TH_MODE_TIMESHARE) {
2091 sched_share_incr();
2092
2093 if (thread->max_priority <= MAXPRI_THROTTLE)
2094 sched_background_incr();
2095 }
2096
2097 return (new_thread);
2098 }
2099 #endif /* CONFIG_SCHED_IDLE_IN_PLACE */
2100
2101 #if defined(CONFIG_SCHED_TRADITIONAL)
2102 static thread_t
2103 sched_traditional_choose_thread(
2104 processor_t processor,
2105 int priority)
2106 {
2107 thread_t thread;
2108
2109 thread = choose_thread(processor, runq_for_processor(processor), priority);
2110 if (thread != THREAD_NULL) {
2111 runq_consider_decr_bound_count(processor, thread);
2112 }
2113
2114 return thread;
2115 }
2116
2117 #endif /* defined(CONFIG_SCHED_TRADITIONAL) */
2118
2119 #if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_FIXEDPRIORITY)
2120
2121 /*
2122 * choose_thread:
2123 *
2124 * Locate a thread to execute from the processor run queue
2125 * and return it. Only choose a thread with greater or equal
2126 * priority.
2127 *
2128 * Associated pset must be locked. Returns THREAD_NULL
2129 * on failure.
2130 */
2131 thread_t
2132 choose_thread(
2133 processor_t processor,
2134 run_queue_t rq,
2135 int priority)
2136 {
2137 queue_t queue = rq->queues + rq->highq;
2138 int pri = rq->highq, count = rq->count;
2139 thread_t thread;
2140
2141 while (count > 0 && pri >= priority) {
2142 thread = (thread_t)queue_first(queue);
2143 while (!queue_end(queue, (queue_entry_t)thread)) {
2144 if (thread->bound_processor == PROCESSOR_NULL ||
2145 thread->bound_processor == processor) {
2146 remqueue((queue_entry_t)thread);
2147
2148 thread->runq = PROCESSOR_NULL;
2149 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
2150 rq->count--;
2151 if (SCHED(priority_is_urgent)(pri)) {
2152 rq->urgency--; assert(rq->urgency >= 0);
2153 }
2154 if (queue_empty(queue)) {
2155 if (pri != IDLEPRI)
2156 clrbit(MAXPRI - pri, rq->bitmap);
2157 rq->highq = MAXPRI - ffsbit(rq->bitmap);
2158 }
2159
2160 return (thread);
2161 }
2162 count--;
2163
2164 thread = (thread_t)queue_next((queue_entry_t)thread);
2165 }
2166
2167 queue--; pri--;
2168 }
2169
2170 return (THREAD_NULL);
2171 }
2172
2173 #endif /* defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_FIXEDPRIORITY) */
2174
2175 /*
2176 * Perform a context switch and start executing the new thread.
2177 *
2178 * Returns FALSE on failure, and the thread is re-dispatched.
2179 *
2180 * Called at splsched.
2181 */
2182
2183 #define funnel_release_check(thread, debug) \
2184 MACRO_BEGIN \
2185 if ((thread)->funnel_state & TH_FN_OWNED) { \
2186 (thread)->funnel_state = TH_FN_REFUNNEL; \
2187 KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE, \
2188 (thread)->funnel_lock, (debug), 0, 0, 0); \
2189 funnel_unlock((thread)->funnel_lock); \
2190 } \
2191 MACRO_END
2192
2193 #define funnel_refunnel_check(thread, debug) \
2194 MACRO_BEGIN \
2195 if ((thread)->funnel_state & TH_FN_REFUNNEL) { \
2196 kern_return_t result = (thread)->wait_result; \
2197 \
2198 (thread)->funnel_state = 0; \
2199 KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE, \
2200 (thread)->funnel_lock, (debug), 0, 0, 0); \
2201 funnel_lock((thread)->funnel_lock); \
2202 KERNEL_DEBUG(0x6032430 | DBG_FUNC_NONE, \
2203 (thread)->funnel_lock, (debug), 0, 0, 0); \
2204 (thread)->funnel_state = TH_FN_OWNED; \
2205 (thread)->wait_result = result; \
2206 } \
2207 MACRO_END
2208
2209 /*
2210 * thread_invoke
2211 *
2212 * "self" is what is currently running on the processor,
2213 * "thread" is the new thread to context switch to
2214 * (which may be the same thread in some cases)
2215 */
2216 static boolean_t
2217 thread_invoke(
2218 thread_t self,
2219 thread_t thread,
2220 ast_t reason)
2221 {
2222 thread_continue_t continuation = self->continuation;
2223 void *parameter = self->parameter;
2224 processor_t processor;
2225 uint64_t ctime = mach_absolute_time();
2226
2227 if (__improbable(get_preemption_level() != 0)) {
2228 int pl = get_preemption_level();
2229 panic("thread_invoke: preemption_level %d, possible cause: %s",
2230 pl, (pl < 0 ? "unlocking an unlocked mutex or spinlock" :
2231 "blocking while holding a spinlock, or within interrupt context"));
2232 }
2233
2234 assert(self == current_thread());
2235
2236 #if defined(CONFIG_SCHED_TRADITIONAL)
2237 sched_traditional_consider_maintenance(ctime);
2238 #endif /* CONFIG_SCHED_TRADITIONAL */
2239
2240 /*
2241 * Mark thread interruptible.
2242 */
2243 thread_lock(thread);
2244 thread->state &= ~TH_UNINT;
2245
2246 #if DEBUG
2247 assert(thread_runnable(thread));
2248 #endif
2249
2250 /* Reload precise timing global policy to thread-local policy */
2251 thread->precise_user_kernel_time = use_precise_user_kernel_time(thread);
2252
2253 /*
2254 * Allow time constraint threads to hang onto
2255 * a stack.
2256 */
2257 if ((self->sched_mode == TH_MODE_REALTIME) && !self->reserved_stack)
2258 self->reserved_stack = self->kernel_stack;
2259
2260 if (continuation != NULL) {
2261 if (!thread->kernel_stack) {
2262 /*
2263 * If we are using a privileged stack,
2264 * check to see whether we can exchange it with
2265 * that of the other thread.
2266 */
2267 if (self->kernel_stack == self->reserved_stack && !thread->reserved_stack)
2268 goto need_stack;
2269
2270 /*
2271 * Context switch by performing a stack handoff.
2272 */
2273 continuation = thread->continuation;
2274 parameter = thread->parameter;
2275
2276 processor = current_processor();
2277 processor->active_thread = thread;
2278 processor->current_pri = thread->sched_pri;
2279 processor->current_thmode = thread->sched_mode;
2280 if (thread->last_processor != processor && thread->last_processor != NULL) {
2281 if (thread->last_processor->processor_set != processor->processor_set)
2282 thread->ps_switch++;
2283 thread->p_switch++;
2284 }
2285 thread->last_processor = processor;
2286 thread->c_switch++;
2287 ast_context(thread);
2288 thread_unlock(thread);
2289
2290 self->reason = reason;
2291
2292 processor->last_dispatch = ctime;
2293 self->last_run_time = ctime;
2294 thread_timer_event(ctime, &thread->system_timer);
2295 PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
2296
2297 /*
2298 * Since non-precise user/kernel time doesn't update the state timer
2299 * during privilege transitions, synthesize an event now.
2300 */
2301 if (!thread->precise_user_kernel_time) {
2302 timer_switch(PROCESSOR_DATA(processor, current_state),
2303 ctime,
2304 PROCESSOR_DATA(processor, current_state));
2305 }
2306
2307 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2308 MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_HANDOFF)|DBG_FUNC_NONE,
2309 self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
2310
2311 if ((thread->chosen_processor != processor) && (thread->chosen_processor != PROCESSOR_NULL)) {
2312 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_MOVED)|DBG_FUNC_NONE,
2313 (uintptr_t)thread_tid(thread), (uintptr_t)thread->chosen_processor->cpu_id, 0, 0, 0);
2314 }
2315
2316 DTRACE_SCHED2(off__cpu, struct thread *, thread, struct proc *, thread->task->bsd_info);
2317
2318 SCHED_STATS_CSW(processor, self->reason, self->sched_pri, thread->sched_pri);
2319
2320 TLOG(1, "thread_invoke: calling stack_handoff\n");
2321 stack_handoff(self, thread);
2322
2323 DTRACE_SCHED(on__cpu);
2324
2325 thread_dispatch(self, thread);
2326
2327 thread->continuation = thread->parameter = NULL;
2328
2329 counter(c_thread_invoke_hits++);
2330
2331 funnel_refunnel_check(thread, 2);
2332 (void) spllo();
2333
2334 assert(continuation);
2335 call_continuation(continuation, parameter, thread->wait_result);
2336 /*NOTREACHED*/
2337 }
2338 else if (thread == self) {
2339 /* same thread but with continuation */
2340 ast_context(self);
2341 counter(++c_thread_invoke_same);
2342 thread_unlock(self);
2343
2344 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2345 MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
2346 self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
2347
2348 self->continuation = self->parameter = NULL;
2349
2350 funnel_refunnel_check(self, 3);
2351 (void) spllo();
2352
2353 call_continuation(continuation, parameter, self->wait_result);
2354 /*NOTREACHED*/
2355 }
2356 }
2357 else {
2358 /*
2359 * Check that the other thread has a stack
2360 */
2361 if (!thread->kernel_stack) {
2362 need_stack:
2363 if (!stack_alloc_try(thread)) {
2364 counter(c_thread_invoke_misses++);
2365 thread_unlock(thread);
2366 thread_stack_enqueue(thread);
2367 return (FALSE);
2368 }
2369 }
2370 else if (thread == self) {
2371 ast_context(self);
2372 counter(++c_thread_invoke_same);
2373 thread_unlock(self);
2374
2375 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2376 MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
2377 self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
2378
2379 return (TRUE);
2380 }
2381 }
2382
2383 /*
2384 * Context switch by full context save.
2385 */
2386 processor = current_processor();
2387 processor->active_thread = thread;
2388 processor->current_pri = thread->sched_pri;
2389 processor->current_thmode = thread->sched_mode;
2390 if (thread->last_processor != processor && thread->last_processor != NULL) {
2391 if (thread->last_processor->processor_set != processor->processor_set)
2392 thread->ps_switch++;
2393 thread->p_switch++;
2394 }
2395 thread->last_processor = processor;
2396 thread->c_switch++;
2397 ast_context(thread);
2398 thread_unlock(thread);
2399
2400 counter(c_thread_invoke_csw++);
2401
2402 assert(self->runq == PROCESSOR_NULL);
2403 self->reason = reason;
2404
2405 processor->last_dispatch = ctime;
2406 self->last_run_time = ctime;
2407 thread_timer_event(ctime, &thread->system_timer);
2408 PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
2409
2410 /*
2411 * Since non-precise user/kernel time doesn't update the state timer
2412 * during privilege transitions, synthesize an event now.
2413 */
2414 if (!thread->precise_user_kernel_time) {
2415 timer_switch(PROCESSOR_DATA(processor, current_state),
2416 ctime,
2417 PROCESSOR_DATA(processor, current_state));
2418 }
2419
2420
2421 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2422 MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
2423 self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
2424
2425 if ((thread->chosen_processor != processor) && (thread->chosen_processor != NULL)) {
2426 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_MOVED)|DBG_FUNC_NONE,
2427 (uintptr_t)thread_tid(thread), (uintptr_t)thread->chosen_processor->cpu_id, 0, 0, 0);
2428 }
2429
2430 DTRACE_SCHED2(off__cpu, struct thread *, thread, struct proc *, thread->task->bsd_info);
2431
2432 SCHED_STATS_CSW(processor, self->reason, self->sched_pri, thread->sched_pri);
2433
2434 /*
2435 * This is where we actually switch register context,
2436 * and address space if required. We will next run
2437 * as a result of a subsequent context switch.
2438 */
2439 assert(continuation == self->continuation);
2440 thread = machine_switch_context(self, continuation, thread);
2441 assert(self == current_thread());
2442 TLOG(1,"thread_invoke: returning machine_switch_context: self %p continuation %p thread %p\n", self, continuation, thread);
2443
2444 DTRACE_SCHED(on__cpu);
2445
2446 /*
2447 * We have been resumed and are set to run.
2448 */
2449 thread_dispatch(thread, self);
2450
2451 if (continuation) {
2452 self->continuation = self->parameter = NULL;
2453
2454 funnel_refunnel_check(self, 3);
2455 (void) spllo();
2456
2457 call_continuation(continuation, parameter, self->wait_result);
2458 /*NOTREACHED*/
2459 }
2460
2461 return (TRUE);
2462 }
2463
2464 /*
2465 * thread_dispatch:
2466 *
2467 * Handle threads at context switch. Re-dispatch other thread
2468 * if still running, otherwise update run state and perform
2469 * special actions. Update quantum for other thread and begin
2470 * the quantum for ourselves.
2471 *
2472 * "self" is our new current thread that we have context switched
2473 * to, "thread" is the old thread that we have switched away from.
2474 *
2475 * Called at splsched.
2476 */
2477 void
2478 thread_dispatch(
2479 thread_t thread,
2480 thread_t self)
2481 {
2482 processor_t processor = self->last_processor;
2483
2484 if (thread != THREAD_NULL) {
2485 /*
2486 * If blocked at a continuation, discard
2487 * the stack.
2488 */
2489 if (thread->continuation != NULL && thread->kernel_stack != 0)
2490 stack_free(thread);
2491
2492 if (!(thread->state & TH_IDLE)) {
2493 int64_t consumed;
2494 int64_t remainder = 0;
2495
2496 if (processor->quantum_end > processor->last_dispatch)
2497 remainder = processor->quantum_end -
2498 processor->last_dispatch;
2499
2500 consumed = thread->current_quantum - remainder;
2501
2502 if ((thread->reason & AST_LEDGER) == 0) {
2503 /*
2504 * Bill CPU time to both the task and
2505 * the individual thread.
2506 */
2507 ledger_credit(thread->t_ledger,
2508 task_ledgers.cpu_time, consumed);
2509 ledger_credit(thread->t_threadledger,
2510 thread_ledgers.cpu_time, consumed);
2511 }
2512
2513 wake_lock(thread);
2514 thread_lock(thread);
2515
2516 /*
2517 * Compute remainder of current quantum.
2518 */
2519 if (first_timeslice(processor) &&
2520 processor->quantum_end > processor->last_dispatch)
2521 thread->current_quantum = (uint32_t)remainder;
2522 else
2523 thread->current_quantum = 0;
2524
2525 if (thread->sched_mode == TH_MODE_REALTIME) {
2526 /*
2527 * Cancel the deadline if the thread has
2528 * consumed the entire quantum.
2529 */
2530 if (thread->current_quantum == 0) {
2531 thread->realtime.deadline = UINT64_MAX;
2532 thread->reason |= AST_QUANTUM;
2533 }
2534 } else {
2535 #if defined(CONFIG_SCHED_TRADITIONAL)
2536 /*
2537 * For non-realtime threads treat a tiny
2538 * remaining quantum as an expired quantum
2539 * but include what's left next time.
2540 */
2541 if (thread->current_quantum < min_std_quantum) {
2542 thread->reason |= AST_QUANTUM;
2543 thread->current_quantum += SCHED(initial_quantum_size)(thread);
2544 }
2545 #endif
2546 }
2547
2548 /*
2549 * If we are doing a direct handoff then
2550 * take the remainder of the quantum.
2551 */
2552 if ((thread->reason & (AST_HANDOFF|AST_QUANTUM)) == AST_HANDOFF) {
2553 self->current_quantum = thread->current_quantum;
2554 thread->reason |= AST_QUANTUM;
2555 thread->current_quantum = 0;
2556 }
2557
2558 thread->computation_metered += (processor->last_dispatch - thread->computation_epoch);
2559
2560 if ((thread->rwlock_count != 0) && !(LcksOpts & disLkRWPrio)) {
2561 integer_t priority;
2562
2563 priority = thread->sched_pri;
2564
2565 if (priority < thread->priority)
2566 priority = thread->priority;
2567 if (priority < BASEPRI_BACKGROUND)
2568 priority = BASEPRI_BACKGROUND;
2569
2570 if ((thread->sched_pri < priority) || !(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2571 KERNEL_DEBUG_CONSTANT(
2572 MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_PROMOTE) | DBG_FUNC_NONE,
2573 (uintptr_t)thread_tid(thread), thread->sched_pri, thread->priority, priority, 0);
2574
2575 thread->sched_flags |= TH_SFLAG_RW_PROMOTED;
2576
2577 if (thread->sched_pri < priority)
2578 set_sched_pri(thread, priority);
2579 }
2580 }
2581
2582 if (!(thread->state & TH_WAIT)) {
2583 /*
2584 * Still running.
2585 */
2586 if (thread->reason & AST_QUANTUM)
2587 thread_setrun(thread, SCHED_TAILQ);
2588 else
2589 if (thread->reason & AST_PREEMPT)
2590 thread_setrun(thread, SCHED_HEADQ);
2591 else
2592 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
2593
2594 thread->reason = AST_NONE;
2595
2596 if (thread->wake_active) {
2597 thread->wake_active = FALSE;
2598 thread_unlock(thread);
2599
2600 thread_wakeup(&thread->wake_active);
2601 }
2602 else
2603 thread_unlock(thread);
2604
2605 wake_unlock(thread);
2606 }
2607 else {
2608 /*
2609 * Waiting.
2610 */
2611 boolean_t should_terminate = FALSE;
2612
2613 /* Only the first call to thread_dispatch
2614 * after explicit termination should add
2615 * the thread to the termination queue
2616 */
2617 if ((thread->state & (TH_TERMINATE|TH_TERMINATE2)) == TH_TERMINATE) {
2618 should_terminate = TRUE;
2619 thread->state |= TH_TERMINATE2;
2620 }
2621
2622 thread->state &= ~TH_RUN;
2623 thread->chosen_processor = PROCESSOR_NULL;
2624
2625 if (thread->sched_mode == TH_MODE_TIMESHARE) {
2626 if (thread->max_priority <= MAXPRI_THROTTLE)
2627 sched_background_decr();
2628
2629 sched_share_decr();
2630 }
2631 sched_run_decr();
2632
2633 (*thread->sched_call)(SCHED_CALL_BLOCK, thread);
2634
2635 if (thread->wake_active) {
2636 thread->wake_active = FALSE;
2637 thread_unlock(thread);
2638
2639 thread_wakeup(&thread->wake_active);
2640 }
2641 else
2642 thread_unlock(thread);
2643
2644 wake_unlock(thread);
2645
2646 if (should_terminate)
2647 thread_terminate_enqueue(thread);
2648 }
2649 }
2650 }
2651
2652 if (!(self->state & TH_IDLE)) {
2653 uint64_t arg1, arg2;
2654 int urgency;
2655
2656 urgency = thread_get_urgency(self, &arg1, &arg2);
2657
2658 thread_tell_urgency(urgency, arg1, arg2, self);
2659
2660 /*
2661 * Get a new quantum if none remaining.
2662 */
2663 if (self->current_quantum == 0) {
2664 thread_quantum_init(self);
2665 self->last_quantum_refill_time = processor->last_dispatch;
2666 }
2667
2668 /*
2669 * Set up quantum timer and timeslice.
2670 */
2671 processor->quantum_end = (processor->last_dispatch + self->current_quantum);
2672 timer_call_enter1(&processor->quantum_timer, self, processor->quantum_end, TIMER_CALL_SYS_CRITICAL);
2673
2674 processor->timeslice = 1;
2675
2676 self->computation_epoch = processor->last_dispatch;
2677 }
2678 else {
2679 timer_call_cancel(&processor->quantum_timer);
2680 processor->timeslice = 0;
2681
2682 thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0, NULL);
2683 }
2684 }
2685
2686 #include <libkern/OSDebug.h>
2687
2688 uint32_t kdebug_thread_block = 0;
2689
2690
2691 /*
2692 * thread_block_reason:
2693 *
2694 * Forces a reschedule, blocking the caller if a wait
2695 * has been asserted.
2696 *
2697 * If a continuation is specified, then thread_invoke will
2698 * attempt to discard the thread's kernel stack. When the
2699 * thread resumes, it will execute the continuation function
2700 * on a new kernel stack.
2701 */
2702 counter(mach_counter_t c_thread_block_calls = 0;)
2703
2704 wait_result_t
2705 thread_block_reason(
2706 thread_continue_t continuation,
2707 void *parameter,
2708 ast_t reason)
2709 {
2710 register thread_t self = current_thread();
2711 register processor_t processor;
2712 register thread_t new_thread;
2713 spl_t s;
2714
2715 counter(++c_thread_block_calls);
2716
2717 s = splsched();
2718
2719 if (!(reason & AST_PREEMPT))
2720 funnel_release_check(self, 2);
2721
2722 processor = current_processor();
2723
2724 /* If we're explicitly yielding, force a subsequent quantum */
2725 if (reason & AST_YIELD)
2726 processor->timeslice = 0;
2727
2728 /* We're handling all scheduling AST's */
2729 ast_off(AST_SCHEDULING);
2730
2731 self->continuation = continuation;
2732 self->parameter = parameter;
2733
2734 if (__improbable(kdebug_thread_block && kdebug_enable && self->state != TH_RUN)) {
2735 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2736 MACHDBG_CODE(DBG_MACH_SCHED,MACH_BLOCK),
2737 reason, VM_KERNEL_UNSLIDE(continuation), 0, 0, 0);
2738 }
2739
2740 do {
2741 thread_lock(self);
2742 new_thread = thread_select(self, processor);
2743 thread_unlock(self);
2744 } while (!thread_invoke(self, new_thread, reason));
2745
2746 funnel_refunnel_check(self, 5);
2747 splx(s);
2748
2749 return (self->wait_result);
2750 }
2751
2752 /*
2753 * thread_block:
2754 *
2755 * Block the current thread if a wait has been asserted.
2756 */
2757 wait_result_t
2758 thread_block(
2759 thread_continue_t continuation)
2760 {
2761 return thread_block_reason(continuation, NULL, AST_NONE);
2762 }
2763
2764 wait_result_t
2765 thread_block_parameter(
2766 thread_continue_t continuation,
2767 void *parameter)
2768 {
2769 return thread_block_reason(continuation, parameter, AST_NONE);
2770 }
2771
2772 /*
2773 * thread_run:
2774 *
2775 * Switch directly from the current thread to the
2776 * new thread, handing off our quantum if appropriate.
2777 *
2778 * New thread must be runnable, and not on a run queue.
2779 *
2780 * Called at splsched.
2781 */
2782 int
2783 thread_run(
2784 thread_t self,
2785 thread_continue_t continuation,
2786 void *parameter,
2787 thread_t new_thread)
2788 {
2789 ast_t handoff = AST_HANDOFF;
2790
2791 funnel_release_check(self, 3);
2792
2793 self->continuation = continuation;
2794 self->parameter = parameter;
2795
2796 while (!thread_invoke(self, new_thread, handoff)) {
2797 processor_t processor = current_processor();
2798
2799 thread_lock(self);
2800 new_thread = thread_select(self, processor);
2801 thread_unlock(self);
2802 handoff = AST_NONE;
2803 }
2804
2805 funnel_refunnel_check(self, 6);
2806
2807 return (self->wait_result);
2808 }
2809
2810 /*
2811 * thread_continue:
2812 *
2813 * Called at splsched when a thread first receives
2814 * a new stack after a continuation.
2815 */
2816 void
2817 thread_continue(
2818 register thread_t thread)
2819 {
2820 register thread_t self = current_thread();
2821 register thread_continue_t continuation;
2822 register void *parameter;
2823
2824 DTRACE_SCHED(on__cpu);
2825
2826 continuation = self->continuation;
2827 parameter = self->parameter;
2828
2829 thread_dispatch(thread, self);
2830
2831 self->continuation = self->parameter = NULL;
2832
2833 funnel_refunnel_check(self, 4);
2834
2835 if (thread != THREAD_NULL)
2836 (void)spllo();
2837
2838 TLOG(1, "thread_continue: calling call_continuation \n");
2839 call_continuation(continuation, parameter, self->wait_result);
2840 /*NOTREACHED*/
2841 }
2842
2843 void
2844 thread_quantum_init(thread_t thread)
2845 {
2846 if (thread->sched_mode == TH_MODE_REALTIME) {
2847 thread->current_quantum = thread->realtime.computation;
2848 } else {
2849 thread->current_quantum = SCHED(initial_quantum_size)(thread);
2850 }
2851 }
2852
2853 #if defined(CONFIG_SCHED_TRADITIONAL)
2854 static uint32_t
2855 sched_traditional_initial_quantum_size(thread_t thread)
2856 {
2857 if ((thread == THREAD_NULL) || thread->priority > MAXPRI_THROTTLE)
2858 return std_quantum;
2859 else
2860 return bg_quantum;
2861 }
2862
2863 static sched_mode_t
2864 sched_traditional_initial_thread_sched_mode(task_t parent_task)
2865 {
2866 if (parent_task == kernel_task)
2867 return TH_MODE_FIXED;
2868 else
2869 return TH_MODE_TIMESHARE;
2870 }
2871
2872 static boolean_t
2873 sched_traditional_supports_timeshare_mode(void)
2874 {
2875 return TRUE;
2876 }
2877
2878 #endif /* CONFIG_SCHED_TRADITIONAL */
2879
2880 /*
2881 * run_queue_init:
2882 *
2883 * Initialize a run queue before first use.
2884 */
2885 void
2886 run_queue_init(
2887 run_queue_t rq)
2888 {
2889 int i;
2890
2891 rq->highq = IDLEPRI;
2892 for (i = 0; i < NRQBM; i++)
2893 rq->bitmap[i] = 0;
2894 setbit(MAXPRI - IDLEPRI, rq->bitmap);
2895 rq->urgency = rq->count = 0;
2896 for (i = 0; i < NRQS; i++)
2897 queue_init(&rq->queues[i]);
2898 }
2899
2900 #if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY)
2901 int
2902 sched_traditional_fairshare_runq_count(void)
2903 {
2904 return fs_runq.count;
2905 }
2906
2907 uint64_t
2908 sched_traditional_fairshare_runq_stats_count_sum(void)
2909 {
2910 return fs_runq.runq_stats.count_sum;
2911 }
2912
2913 void
2914 sched_traditional_fairshare_enqueue(thread_t thread)
2915 {
2916 queue_t queue = &fs_runq.queue;
2917
2918 simple_lock(&fs_lock);
2919
2920 enqueue_tail(queue, (queue_entry_t)thread);
2921
2922 thread->runq = FS_RUNQ;
2923 SCHED_STATS_RUNQ_CHANGE(&fs_runq.runq_stats, fs_runq.count);
2924 fs_runq.count++;
2925
2926 simple_unlock(&fs_lock);
2927 }
2928
2929 thread_t
2930 sched_traditional_fairshare_dequeue(void)
2931 {
2932 thread_t thread;
2933
2934 simple_lock(&fs_lock);
2935 if (fs_runq.count > 0) {
2936 thread = (thread_t)dequeue_head(&fs_runq.queue);
2937
2938 thread->runq = PROCESSOR_NULL;
2939 SCHED_STATS_RUNQ_CHANGE(&fs_runq.runq_stats, fs_runq.count);
2940 fs_runq.count--;
2941
2942 simple_unlock(&fs_lock);
2943
2944 return (thread);
2945 }
2946 simple_unlock(&fs_lock);
2947
2948 return THREAD_NULL;
2949 }
2950
2951 boolean_t
2952 sched_traditional_fairshare_queue_remove(thread_t thread)
2953 {
2954 queue_t q;
2955
2956 simple_lock(&fs_lock);
2957 q = &fs_runq.queue;
2958
2959 if (FS_RUNQ == thread->runq) {
2960 remqueue((queue_entry_t)thread);
2961 SCHED_STATS_RUNQ_CHANGE(&fs_runq.runq_stats, fs_runq.count);
2962 fs_runq.count--;
2963
2964 thread->runq = PROCESSOR_NULL;
2965 simple_unlock(&fs_lock);
2966 return (TRUE);
2967 }
2968 else {
2969 /*
2970 * The thread left the run queue before we could
2971 * lock the run queue.
2972 */
2973 assert(thread->runq == PROCESSOR_NULL);
2974 simple_unlock(&fs_lock);
2975 return (FALSE);
2976 }
2977 }
2978
2979 #endif /* defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY) */
2980
2981 /*
2982 * run_queue_dequeue:
2983 *
2984 * Perform a dequeue operation on a run queue,
2985 * and return the resulting thread.
2986 *
2987 * The run queue must be locked (see thread_run_queue_remove()
2988 * for more info), and not empty.
2989 */
2990 thread_t
2991 run_queue_dequeue(
2992 run_queue_t rq,
2993 integer_t options)
2994 {
2995 thread_t thread;
2996 queue_t queue = rq->queues + rq->highq;
2997
2998 if (options & SCHED_HEADQ) {
2999 thread = (thread_t)dequeue_head(queue);
3000 }
3001 else {
3002 thread = (thread_t)dequeue_tail(queue);
3003 }
3004
3005 thread->runq = PROCESSOR_NULL;
3006 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
3007 rq->count--;
3008 if (SCHED(priority_is_urgent)(rq->highq)) {
3009 rq->urgency--; assert(rq->urgency >= 0);
3010 }
3011 if (queue_empty(queue)) {
3012 if (rq->highq != IDLEPRI)
3013 clrbit(MAXPRI - rq->highq, rq->bitmap);
3014 rq->highq = MAXPRI - ffsbit(rq->bitmap);
3015 }
3016
3017 return (thread);
3018 }
3019
3020 /*
3021 * run_queue_enqueue:
3022 *
3023 * Perform a enqueue operation on a run queue.
3024 *
3025 * The run queue must be locked (see thread_run_queue_remove()
3026 * for more info).
3027 */
3028 boolean_t
3029 run_queue_enqueue(
3030 run_queue_t rq,
3031 thread_t thread,
3032 integer_t options)
3033 {
3034 queue_t queue = rq->queues + thread->sched_pri;
3035 boolean_t result = FALSE;
3036
3037 if (queue_empty(queue)) {
3038 enqueue_tail(queue, (queue_entry_t)thread);
3039
3040 setbit(MAXPRI - thread->sched_pri, rq->bitmap);
3041 if (thread->sched_pri > rq->highq) {
3042 rq->highq = thread->sched_pri;
3043 result = TRUE;
3044 }
3045 }
3046 else
3047 if (options & SCHED_TAILQ)
3048 enqueue_tail(queue, (queue_entry_t)thread);
3049 else
3050 enqueue_head(queue, (queue_entry_t)thread);
3051
3052 if (SCHED(priority_is_urgent)(thread->sched_pri))
3053 rq->urgency++;
3054 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
3055 rq->count++;
3056
3057 return (result);
3058
3059 }
3060
3061 /*
3062 * run_queue_remove:
3063 *
3064 * Remove a specific thread from a runqueue.
3065 *
3066 * The run queue must be locked.
3067 */
3068 void
3069 run_queue_remove(
3070 run_queue_t rq,
3071 thread_t thread)
3072 {
3073
3074 remqueue((queue_entry_t)thread);
3075 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
3076 rq->count--;
3077 if (SCHED(priority_is_urgent)(thread->sched_pri)) {
3078 rq->urgency--; assert(rq->urgency >= 0);
3079 }
3080
3081 if (queue_empty(rq->queues + thread->sched_pri)) {
3082 /* update run queue status */
3083 if (thread->sched_pri != IDLEPRI)
3084 clrbit(MAXPRI - thread->sched_pri, rq->bitmap);
3085 rq->highq = MAXPRI - ffsbit(rq->bitmap);
3086 }
3087
3088 thread->runq = PROCESSOR_NULL;
3089 }
3090
3091 /*
3092 * fairshare_setrun:
3093 *
3094 * Dispatch a thread for round-robin execution.
3095 *
3096 * Thread must be locked. Associated pset must
3097 * be locked, and is returned unlocked.
3098 */
3099 static void
3100 fairshare_setrun(
3101 processor_t processor,
3102 thread_t thread)
3103 {
3104 processor_set_t pset = processor->processor_set;
3105
3106 thread->chosen_processor = processor;
3107
3108 SCHED(fairshare_enqueue)(thread);
3109
3110 if (processor != current_processor())
3111 machine_signal_idle(processor);
3112
3113 pset_unlock(pset);
3114
3115 }
3116
3117 /*
3118 * realtime_queue_insert:
3119 *
3120 * Enqueue a thread for realtime execution.
3121 */
3122 static boolean_t
3123 realtime_queue_insert(
3124 thread_t thread)
3125 {
3126 queue_t queue = &rt_runq.queue;
3127 uint64_t deadline = thread->realtime.deadline;
3128 boolean_t preempt = FALSE;
3129
3130 simple_lock(&rt_lock);
3131
3132 if (queue_empty(queue)) {
3133 enqueue_tail(queue, (queue_entry_t)thread);
3134 preempt = TRUE;
3135 }
3136 else {
3137 register thread_t entry = (thread_t)queue_first(queue);
3138
3139 while (TRUE) {
3140 if ( queue_end(queue, (queue_entry_t)entry) ||
3141 deadline < entry->realtime.deadline ) {
3142 entry = (thread_t)queue_prev((queue_entry_t)entry);
3143 break;
3144 }
3145
3146 entry = (thread_t)queue_next((queue_entry_t)entry);
3147 }
3148
3149 if ((queue_entry_t)entry == queue)
3150 preempt = TRUE;
3151
3152 insque((queue_entry_t)thread, (queue_entry_t)entry);
3153 }
3154
3155 thread->runq = RT_RUNQ;
3156 SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
3157 rt_runq.count++;
3158
3159 simple_unlock(&rt_lock);
3160
3161 return (preempt);
3162 }
3163
3164 /*
3165 * realtime_setrun:
3166 *
3167 * Dispatch a thread for realtime execution.
3168 *
3169 * Thread must be locked. Associated pset must
3170 * be locked, and is returned unlocked.
3171 */
3172 static void
3173 realtime_setrun(
3174 processor_t processor,
3175 thread_t thread)
3176 {
3177 processor_set_t pset = processor->processor_set;
3178 ast_t preempt;
3179
3180 thread->chosen_processor = processor;
3181
3182 /*
3183 * Dispatch directly onto idle processor.
3184 */
3185 if ( (thread->bound_processor == processor)
3186 && processor->state == PROCESSOR_IDLE) {
3187 remqueue((queue_entry_t)processor);
3188 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
3189
3190 processor->next_thread = thread;
3191 processor->current_pri = thread->sched_pri;
3192 processor->current_thmode = thread->sched_mode;
3193 processor->deadline = thread->realtime.deadline;
3194 processor->state = PROCESSOR_DISPATCHING;
3195
3196 if (processor != current_processor()) {
3197 if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) {
3198 /* cleared on exit from main processor_idle() loop */
3199 pset->pending_AST_cpu_mask |= (1U << processor->cpu_id);
3200 machine_signal_idle(processor);
3201 }
3202 }
3203
3204 pset_unlock(pset);
3205 return;
3206 }
3207
3208 if (processor->current_pri < BASEPRI_RTQUEUES)
3209 preempt = (AST_PREEMPT | AST_URGENT);
3210 else if (thread->realtime.deadline < processor->deadline)
3211 preempt = (AST_PREEMPT | AST_URGENT);
3212 else
3213 preempt = AST_NONE;
3214
3215 realtime_queue_insert(thread);
3216
3217 if (preempt != AST_NONE) {
3218 if (processor->state == PROCESSOR_IDLE) {
3219 remqueue((queue_entry_t)processor);
3220 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
3221 processor->next_thread = THREAD_NULL;
3222 processor->current_pri = thread->sched_pri;
3223 processor->current_thmode = thread->sched_mode;
3224 processor->deadline = thread->realtime.deadline;
3225 processor->state = PROCESSOR_DISPATCHING;
3226 if (processor == current_processor()) {
3227 ast_on(preempt);
3228 } else {
3229 if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) {
3230 /* cleared on exit from main processor_idle() loop */
3231 pset->pending_AST_cpu_mask |= (1U << processor->cpu_id);
3232 machine_signal_idle(processor);
3233 }
3234 }
3235 } else if (processor->state == PROCESSOR_DISPATCHING) {
3236 if ((processor->next_thread == THREAD_NULL) && ((processor->current_pri < thread->sched_pri) || (processor->deadline > thread->realtime.deadline))) {
3237 processor->current_pri = thread->sched_pri;
3238 processor->current_thmode = thread->sched_mode;
3239 processor->deadline = thread->realtime.deadline;
3240 }
3241 } else {
3242 if (processor == current_processor()) {
3243 ast_on(preempt);
3244 } else {
3245 if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) {
3246 /* cleared after IPI causes csw_check() to be called */
3247 pset->pending_AST_cpu_mask |= (1U << processor->cpu_id);
3248 cause_ast_check(processor);
3249 }
3250 }
3251 }
3252 } else {
3253 /* Selected processor was too busy, just keep thread enqueued and let other processors drain it naturally. */
3254 }
3255
3256 pset_unlock(pset);
3257 }
3258
3259 #if defined(CONFIG_SCHED_TRADITIONAL)
3260
3261 static boolean_t
3262 priority_is_urgent(int priority)
3263 {
3264 return testbit(priority, sched_preempt_pri) ? TRUE : FALSE;
3265 }
3266
3267 /*
3268 * processor_enqueue:
3269 *
3270 * Enqueue thread on a processor run queue. Thread must be locked,
3271 * and not already be on a run queue.
3272 *
3273 * Returns TRUE if a preemption is indicated based on the state
3274 * of the run queue.
3275 *
3276 * The run queue must be locked (see thread_run_queue_remove()
3277 * for more info).
3278 */
3279 static boolean_t
3280 processor_enqueue(
3281 processor_t processor,
3282 thread_t thread,
3283 integer_t options)
3284 {
3285 run_queue_t rq = runq_for_processor(processor);
3286 boolean_t result;
3287
3288 result = run_queue_enqueue(rq, thread, options);
3289 thread->runq = processor;
3290 runq_consider_incr_bound_count(processor, thread);
3291
3292 return (result);
3293 }
3294
3295 #endif /* CONFIG_SCHED_TRADITIONAL */
3296
3297 /*
3298 * processor_setrun:
3299 *
3300 * Dispatch a thread for execution on a
3301 * processor.
3302 *
3303 * Thread must be locked. Associated pset must
3304 * be locked, and is returned unlocked.
3305 */
3306 static void
3307 processor_setrun(
3308 processor_t processor,
3309 thread_t thread,
3310 integer_t options)
3311 {
3312 processor_set_t pset = processor->processor_set;
3313 ast_t preempt;
3314 enum { eExitIdle, eInterruptRunning, eDoNothing } ipi_action = eDoNothing;
3315
3316 thread->chosen_processor = processor;
3317
3318 /*
3319 * Dispatch directly onto idle processor.
3320 */
3321 if ( (SCHED(direct_dispatch_to_idle_processors) ||
3322 thread->bound_processor == processor)
3323 && processor->state == PROCESSOR_IDLE) {
3324 remqueue((queue_entry_t)processor);
3325 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
3326
3327 processor->next_thread = thread;
3328 processor->current_pri = thread->sched_pri;
3329 processor->current_thmode = thread->sched_mode;
3330 processor->deadline = UINT64_MAX;
3331 processor->state = PROCESSOR_DISPATCHING;
3332
3333 if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) {
3334 /* cleared on exit from main processor_idle() loop */
3335 pset->pending_AST_cpu_mask |= (1U << processor->cpu_id);
3336 machine_signal_idle(processor);
3337 }
3338
3339 pset_unlock(pset);
3340 return;
3341 }
3342
3343 /*
3344 * Set preemption mode.
3345 */
3346 if (SCHED(priority_is_urgent)(thread->sched_pri) && thread->sched_pri > processor->current_pri)
3347 preempt = (AST_PREEMPT | AST_URGENT);
3348 else if(processor->active_thread && thread_eager_preemption(processor->active_thread))
3349 preempt = (AST_PREEMPT | AST_URGENT);
3350 else if ((thread->sched_mode == TH_MODE_TIMESHARE) && (thread->sched_pri < thread->priority)) {
3351 if(SCHED(priority_is_urgent)(thread->priority) && thread->sched_pri > processor->current_pri) {
3352 preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE;
3353 } else {
3354 preempt = AST_NONE;
3355 }
3356 } else
3357 preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE;
3358
3359 SCHED(processor_enqueue)(processor, thread, options);
3360
3361 if (preempt != AST_NONE) {
3362 if (processor->state == PROCESSOR_IDLE) {
3363 remqueue((queue_entry_t)processor);
3364 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
3365 processor->next_thread = THREAD_NULL;
3366 processor->current_pri = thread->sched_pri;
3367 processor->current_thmode = thread->sched_mode;
3368 processor->deadline = UINT64_MAX;
3369 processor->state = PROCESSOR_DISPATCHING;
3370
3371 ipi_action = eExitIdle;
3372 } else if ( processor->state == PROCESSOR_DISPATCHING) {
3373 if ((processor->next_thread == THREAD_NULL) && (processor->current_pri < thread->sched_pri)) {
3374 processor->current_pri = thread->sched_pri;
3375 processor->current_thmode = thread->sched_mode;
3376 processor->deadline = UINT64_MAX;
3377 }
3378 } else if ( (processor->state == PROCESSOR_RUNNING ||
3379 processor->state == PROCESSOR_SHUTDOWN) &&
3380 (thread->sched_pri >= processor->current_pri ||
3381 processor->current_thmode == TH_MODE_FAIRSHARE)) {
3382 ipi_action = eInterruptRunning;
3383 }
3384 } else {
3385 /*
3386 * New thread is not important enough to preempt what is running, but
3387 * special processor states may need special handling
3388 */
3389 if (processor->state == PROCESSOR_SHUTDOWN &&
3390 thread->sched_pri >= processor->current_pri ) {
3391 ipi_action = eInterruptRunning;
3392 } else if ( processor->state == PROCESSOR_IDLE &&
3393 processor != current_processor() ) {
3394 remqueue((queue_entry_t)processor);
3395 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
3396 processor->next_thread = THREAD_NULL;
3397 processor->current_pri = thread->sched_pri;
3398 processor->current_thmode = thread->sched_mode;
3399 processor->deadline = UINT64_MAX;
3400 processor->state = PROCESSOR_DISPATCHING;
3401
3402 ipi_action = eExitIdle;
3403 }
3404 }
3405
3406 switch (ipi_action) {
3407 case eDoNothing:
3408 break;
3409 case eExitIdle:
3410 if (processor == current_processor()) {
3411 if (csw_check_locked(processor, pset) != AST_NONE)
3412 ast_on(preempt);
3413 } else {
3414 if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) {
3415 /* cleared on exit from main processor_idle() loop */
3416 pset->pending_AST_cpu_mask |= (1U << processor->cpu_id);
3417 machine_signal_idle(processor);
3418 }
3419 }
3420 break;
3421 case eInterruptRunning:
3422 if (processor == current_processor()) {
3423 if (csw_check_locked(processor, pset) != AST_NONE)
3424 ast_on(preempt);
3425 } else {
3426 if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) {
3427 /* cleared after IPI causes csw_check() to be called */
3428 pset->pending_AST_cpu_mask |= (1U << processor->cpu_id);
3429 cause_ast_check(processor);
3430 }
3431 }
3432 break;
3433 }
3434
3435 pset_unlock(pset);
3436 }
3437
3438 #if defined(CONFIG_SCHED_TRADITIONAL)
3439
3440 static boolean_t
3441 processor_queue_empty(processor_t processor)
3442 {
3443 return runq_for_processor(processor)->count == 0;
3444
3445 }
3446
3447 static boolean_t
3448 sched_traditional_with_pset_runqueue_processor_queue_empty(processor_t processor)
3449 {
3450 processor_set_t pset = processor->processor_set;
3451 int count = runq_for_processor(processor)->count;
3452
3453 /*
3454 * The pset runq contains the count of all runnable threads
3455 * for all processors in the pset. However, for threads that
3456 * are bound to another processor, the current "processor"
3457 * is not eligible to execute the thread. So we only
3458 * include bound threads that our bound to the current
3459 * "processor". This allows the processor to idle when the
3460 * count of eligible threads drops to 0, even if there's
3461 * a runnable thread bound to a different processor in the
3462 * shared runq.
3463 */
3464
3465 count -= pset->pset_runq_bound_count;
3466 count += processor->runq_bound_count;
3467
3468 return count == 0;
3469 }
3470
3471 static ast_t
3472 processor_csw_check(processor_t processor)
3473 {
3474 run_queue_t runq;
3475 boolean_t has_higher;
3476
3477 assert(processor->active_thread != NULL);
3478
3479 runq = runq_for_processor(processor);
3480 if (first_timeslice(processor)) {
3481 has_higher = (runq->highq > processor->current_pri);
3482 } else {
3483 has_higher = (runq->highq >= processor->current_pri);
3484 }
3485 if (has_higher) {
3486 if (runq->urgency > 0)
3487 return (AST_PREEMPT | AST_URGENT);
3488
3489 if (processor->active_thread && thread_eager_preemption(processor->active_thread))
3490 return (AST_PREEMPT | AST_URGENT);
3491
3492 return AST_PREEMPT;
3493 }
3494
3495 return AST_NONE;
3496 }
3497
3498 static boolean_t
3499 processor_queue_has_priority(processor_t processor,
3500 int priority,
3501 boolean_t gte)
3502 {
3503 if (gte)
3504 return runq_for_processor(processor)->highq >= priority;
3505 else
3506 return runq_for_processor(processor)->highq > priority;
3507 }
3508
3509 static boolean_t
3510 should_current_thread_rechoose_processor(processor_t processor)
3511 {
3512 return (processor->current_pri < BASEPRI_RTQUEUES
3513 && processor->processor_meta != PROCESSOR_META_NULL
3514 && processor->processor_meta->primary != processor);
3515 }
3516
3517 static int
3518 sched_traditional_processor_runq_count(processor_t processor)
3519 {
3520 return runq_for_processor(processor)->count;
3521 }
3522
3523
3524 static uint64_t
3525 sched_traditional_processor_runq_stats_count_sum(processor_t processor)
3526 {
3527 return runq_for_processor(processor)->runq_stats.count_sum;
3528 }
3529
3530 static uint64_t
3531 sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum(processor_t processor)
3532 {
3533 if (processor->cpu_id == processor->processor_set->cpu_set_low)
3534 return runq_for_processor(processor)->runq_stats.count_sum;
3535 else
3536 return 0ULL;
3537 }
3538
3539 #endif /* CONFIG_SCHED_TRADITIONAL */
3540
3541 #define next_pset(p) (((p)->pset_list != PROCESSOR_SET_NULL)? (p)->pset_list: (p)->node->psets)
3542
3543 /*
3544 * choose_next_pset:
3545 *
3546 * Return the next sibling pset containing
3547 * available processors.
3548 *
3549 * Returns the original pset if none other is
3550 * suitable.
3551 */
3552 static processor_set_t
3553 choose_next_pset(
3554 processor_set_t pset)
3555 {
3556 processor_set_t nset = pset;
3557
3558 do {
3559 nset = next_pset(nset);
3560 } while (nset->online_processor_count < 1 && nset != pset);
3561
3562 return (nset);
3563 }
3564
3565 /*
3566 * choose_processor:
3567 *
3568 * Choose a processor for the thread, beginning at
3569 * the pset. Accepts an optional processor hint in
3570 * the pset.
3571 *
3572 * Returns a processor, possibly from a different pset.
3573 *
3574 * The thread must be locked. The pset must be locked,
3575 * and the resulting pset is locked on return.
3576 */
3577 processor_t
3578 choose_processor(
3579 processor_set_t pset,
3580 processor_t processor,
3581 thread_t thread)
3582 {
3583 processor_set_t nset, cset = pset;
3584 processor_meta_t pmeta = PROCESSOR_META_NULL;
3585 processor_t mprocessor;
3586
3587 /*
3588 * Prefer the hinted processor, when appropriate.
3589 */
3590
3591 if (processor != PROCESSOR_NULL) {
3592 if (processor->processor_meta != PROCESSOR_META_NULL)
3593 processor = processor->processor_meta->primary;
3594 }
3595
3596 mprocessor = machine_choose_processor(pset, processor);
3597 if (mprocessor != PROCESSOR_NULL)
3598 processor = mprocessor;
3599
3600 if (processor != PROCESSOR_NULL) {
3601 if (processor->processor_set != pset ||
3602 processor->state == PROCESSOR_INACTIVE ||
3603 processor->state == PROCESSOR_SHUTDOWN ||
3604 processor->state == PROCESSOR_OFF_LINE)
3605 processor = PROCESSOR_NULL;
3606 else
3607 if (processor->state == PROCESSOR_IDLE ||
3608 ((thread->sched_pri >= BASEPRI_RTQUEUES) &&
3609 (processor->current_pri < BASEPRI_RTQUEUES)))
3610 return (processor);
3611 }
3612
3613 /*
3614 * Iterate through the processor sets to locate
3615 * an appropriate processor.
3616 */
3617 do {
3618 /*
3619 * Choose an idle processor.
3620 */
3621 if (!queue_empty(&cset->idle_queue))
3622 return ((processor_t)queue_first(&cset->idle_queue));
3623
3624 if (thread->sched_pri >= BASEPRI_RTQUEUES) {
3625 integer_t lowest_priority = MAXPRI + 1;
3626 integer_t lowest_unpaired = MAXPRI + 1;
3627 uint64_t furthest_deadline = 1;
3628 processor_t lp_processor = PROCESSOR_NULL;
3629 processor_t lp_unpaired = PROCESSOR_NULL;
3630 processor_t fd_processor = PROCESSOR_NULL;
3631
3632 lp_processor = cset->low_pri;
3633 /* Consider hinted processor */
3634 if (lp_processor != PROCESSOR_NULL &&
3635 ((lp_processor->processor_meta == PROCESSOR_META_NULL) ||
3636 ((lp_processor == lp_processor->processor_meta->primary) &&
3637 !queue_empty(&lp_processor->processor_meta->idle_queue))) &&
3638 lp_processor->state != PROCESSOR_INACTIVE &&
3639 lp_processor->state != PROCESSOR_SHUTDOWN &&
3640 lp_processor->state != PROCESSOR_OFF_LINE &&
3641 (lp_processor->current_pri < thread->sched_pri))
3642 return lp_processor;
3643
3644 processor = (processor_t)queue_first(&cset->active_queue);
3645 while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) {
3646 /* Discover the processor executing the
3647 * thread with the lowest priority within
3648 * this pset, or the one with the furthest
3649 * deadline
3650 */
3651 integer_t cpri = processor->current_pri;
3652 if (cpri < lowest_priority) {
3653 lowest_priority = cpri;
3654 lp_processor = processor;
3655 }
3656
3657 if ((cpri >= BASEPRI_RTQUEUES) && (processor->deadline > furthest_deadline)) {
3658 furthest_deadline = processor->deadline;
3659 fd_processor = processor;
3660 }
3661
3662
3663 if (processor->processor_meta != PROCESSOR_META_NULL &&
3664 !queue_empty(&processor->processor_meta->idle_queue)) {
3665 if (cpri < lowest_unpaired) {
3666 lowest_unpaired = cpri;
3667 lp_unpaired = processor;
3668 pmeta = processor->processor_meta;
3669 }
3670 else
3671 if (pmeta == PROCESSOR_META_NULL)
3672 pmeta = processor->processor_meta;
3673 }
3674 processor = (processor_t)queue_next((queue_entry_t)processor);
3675 }
3676
3677 if (thread->sched_pri > lowest_unpaired)
3678 return lp_unpaired;
3679
3680 if (pmeta != PROCESSOR_META_NULL)
3681 return ((processor_t)queue_first(&pmeta->idle_queue));
3682 if (thread->sched_pri > lowest_priority)
3683 return lp_processor;
3684 if (thread->realtime.deadline < furthest_deadline)
3685 return fd_processor;
3686
3687 processor = PROCESSOR_NULL;
3688 }
3689 else {
3690 /*
3691 * Check any hinted processors in the processor set if available.
3692 */
3693 if (cset->low_pri != PROCESSOR_NULL && cset->low_pri->state != PROCESSOR_INACTIVE &&
3694 cset->low_pri->state != PROCESSOR_SHUTDOWN && cset->low_pri->state != PROCESSOR_OFF_LINE &&
3695 (processor == PROCESSOR_NULL ||
3696 (thread->sched_pri > BASEPRI_DEFAULT && cset->low_pri->current_pri < thread->sched_pri))) {
3697 processor = cset->low_pri;
3698 }
3699 else
3700 if (cset->low_count != PROCESSOR_NULL && cset->low_count->state != PROCESSOR_INACTIVE &&
3701 cset->low_count->state != PROCESSOR_SHUTDOWN && cset->low_count->state != PROCESSOR_OFF_LINE &&
3702 (processor == PROCESSOR_NULL || (thread->sched_pri <= BASEPRI_DEFAULT &&
3703 SCHED(processor_runq_count)(cset->low_count) < SCHED(processor_runq_count)(processor)))) {
3704 processor = cset->low_count;
3705 }
3706
3707 /*
3708 * Otherwise, choose an available processor in the set.
3709 */
3710 if (processor == PROCESSOR_NULL) {
3711 processor = (processor_t)dequeue_head(&cset->active_queue);
3712 if (processor != PROCESSOR_NULL)
3713 enqueue_tail(&cset->active_queue, (queue_entry_t)processor);
3714 }
3715
3716 if (processor != PROCESSOR_NULL && pmeta == PROCESSOR_META_NULL) {
3717 if (processor->processor_meta != PROCESSOR_META_NULL &&
3718 !queue_empty(&processor->processor_meta->idle_queue))
3719 pmeta = processor->processor_meta;
3720 }
3721 }
3722
3723 /*
3724 * Move onto the next processor set.
3725 */
3726 nset = next_pset(cset);
3727
3728 if (nset != pset) {
3729 pset_unlock(cset);
3730
3731 cset = nset;
3732 pset_lock(cset);
3733 }
3734 } while (nset != pset);
3735
3736 /*
3737 * Make sure that we pick a running processor,
3738 * and that the correct processor set is locked.
3739 */
3740 do {
3741 if (pmeta != PROCESSOR_META_NULL) {
3742 if (cset != pmeta->primary->processor_set) {
3743 pset_unlock(cset);
3744
3745 cset = pmeta->primary->processor_set;
3746 pset_lock(cset);
3747 }
3748
3749 if (!queue_empty(&pmeta->idle_queue))
3750 return ((processor_t)queue_first(&pmeta->idle_queue));
3751
3752 pmeta = PROCESSOR_META_NULL;
3753 }
3754
3755 /*
3756 * If we haven't been able to choose a processor,
3757 * pick the boot processor and return it.
3758 */
3759 if (processor == PROCESSOR_NULL) {
3760 processor = master_processor;
3761
3762 /*
3763 * Check that the correct processor set is
3764 * returned locked.
3765 */
3766 if (cset != processor->processor_set) {
3767 pset_unlock(cset);
3768
3769 cset = processor->processor_set;
3770 pset_lock(cset);
3771 }
3772
3773 return (processor);
3774 }
3775
3776 /*
3777 * Check that the processor set for the chosen
3778 * processor is locked.
3779 */
3780 if (cset != processor->processor_set) {
3781 pset_unlock(cset);
3782
3783 cset = processor->processor_set;
3784 pset_lock(cset);
3785 }
3786
3787 /*
3788 * We must verify that the chosen processor is still available.
3789 */
3790 if (processor->state == PROCESSOR_INACTIVE ||
3791 processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE)
3792 processor = PROCESSOR_NULL;
3793 } while (processor == PROCESSOR_NULL);
3794
3795 return (processor);
3796 }
3797
3798 /*
3799 * thread_setrun:
3800 *
3801 * Dispatch thread for execution, onto an idle
3802 * processor or run queue, and signal a preemption
3803 * as appropriate.
3804 *
3805 * Thread must be locked.
3806 */
3807 void
3808 thread_setrun(
3809 thread_t thread,
3810 integer_t options)
3811 {
3812 processor_t processor;
3813 processor_set_t pset;
3814
3815 #if DEBUG
3816 assert(thread_runnable(thread));
3817 #endif
3818
3819 /*
3820 * Update priority if needed.
3821 */
3822 if (SCHED(can_update_priority)(thread))
3823 SCHED(update_priority)(thread);
3824
3825 assert(thread->runq == PROCESSOR_NULL);
3826
3827 if (thread->bound_processor == PROCESSOR_NULL) {
3828 /*
3829 * Unbound case.
3830 */
3831 if (thread->affinity_set != AFFINITY_SET_NULL) {
3832 /*
3833 * Use affinity set policy hint.
3834 */
3835 pset = thread->affinity_set->aset_pset;
3836 pset_lock(pset);
3837
3838 processor = SCHED(choose_processor)(pset, PROCESSOR_NULL, thread);
3839
3840 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
3841 (uintptr_t)thread_tid(thread), (uintptr_t)-1, processor->cpu_id, processor->state, 0);
3842 }
3843 else
3844 if (thread->last_processor != PROCESSOR_NULL) {
3845 /*
3846 * Simple (last processor) affinity case.
3847 */
3848 processor = thread->last_processor;
3849 pset = processor->processor_set;
3850 pset_lock(pset);
3851 processor = SCHED(choose_processor)(pset, processor, thread);
3852
3853 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
3854 (uintptr_t)thread_tid(thread), thread->last_processor->cpu_id, processor->cpu_id, processor->state, 0);
3855 }
3856 else {
3857 /*
3858 * No Affinity case:
3859 *
3860 * Utilitize a per task hint to spread threads
3861 * among the available processor sets.
3862 */
3863 task_t task = thread->task;
3864
3865 pset = task->pset_hint;
3866 if (pset == PROCESSOR_SET_NULL)
3867 pset = current_processor()->processor_set;
3868
3869 pset = choose_next_pset(pset);
3870 pset_lock(pset);
3871
3872 processor = SCHED(choose_processor)(pset, PROCESSOR_NULL, thread);
3873 task->pset_hint = processor->processor_set;
3874
3875 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
3876 (uintptr_t)thread_tid(thread), (uintptr_t)-1, processor->cpu_id, processor->state, 0);
3877 }
3878 }
3879 else {
3880 /*
3881 * Bound case:
3882 *
3883 * Unconditionally dispatch on the processor.
3884 */
3885 processor = thread->bound_processor;
3886 pset = processor->processor_set;
3887 pset_lock(pset);
3888
3889 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
3890 (uintptr_t)thread_tid(thread), (uintptr_t)-2, processor->cpu_id, processor->state, 0);
3891 }
3892
3893 /*
3894 * Dispatch the thread on the choosen processor.
3895 */
3896 if (thread->sched_pri >= BASEPRI_RTQUEUES)
3897 realtime_setrun(processor, thread);
3898 else if (thread->sched_mode == TH_MODE_FAIRSHARE)
3899 fairshare_setrun(processor, thread);
3900 else
3901 processor_setrun(processor, thread, options);
3902 }
3903
3904 processor_set_t
3905 task_choose_pset(
3906 task_t task)
3907 {
3908 processor_set_t pset = task->pset_hint;
3909
3910 if (pset != PROCESSOR_SET_NULL)
3911 pset = choose_next_pset(pset);
3912
3913 return (pset);
3914 }
3915
3916 #if defined(CONFIG_SCHED_TRADITIONAL)
3917
3918 /*
3919 * processor_queue_shutdown:
3920 *
3921 * Shutdown a processor run queue by
3922 * re-dispatching non-bound threads.
3923 *
3924 * Associated pset must be locked, and is
3925 * returned unlocked.
3926 */
3927 void
3928 processor_queue_shutdown(
3929 processor_t processor)
3930 {
3931 processor_set_t pset = processor->processor_set;
3932 run_queue_t rq = runq_for_processor(processor);
3933 queue_t queue = rq->queues + rq->highq;
3934 int pri = rq->highq, count = rq->count;
3935 thread_t next, thread;
3936 queue_head_t tqueue;
3937
3938 queue_init(&tqueue);
3939
3940 while (count > 0) {
3941 thread = (thread_t)queue_first(queue);
3942 while (!queue_end(queue, (queue_entry_t)thread)) {
3943 next = (thread_t)queue_next((queue_entry_t)thread);
3944
3945 if (thread->bound_processor == PROCESSOR_NULL) {
3946 remqueue((queue_entry_t)thread);
3947
3948 thread->runq = PROCESSOR_NULL;
3949 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
3950 runq_consider_decr_bound_count(processor, thread);
3951 rq->count--;
3952 if (SCHED(priority_is_urgent)(pri)) {
3953 rq->urgency--; assert(rq->urgency >= 0);
3954 }
3955 if (queue_empty(queue)) {
3956 if (pri != IDLEPRI)
3957 clrbit(MAXPRI - pri, rq->bitmap);
3958 rq->highq = MAXPRI - ffsbit(rq->bitmap);
3959 }
3960
3961 enqueue_tail(&tqueue, (queue_entry_t)thread);
3962 }
3963 count--;
3964
3965 thread = next;
3966 }
3967
3968 queue--; pri--;
3969 }
3970
3971 pset_unlock(pset);
3972
3973 while ((thread = (thread_t)dequeue_head(&tqueue)) != THREAD_NULL) {
3974 thread_lock(thread);
3975
3976 thread_setrun(thread, SCHED_TAILQ);
3977
3978 thread_unlock(thread);
3979 }
3980 }
3981
3982 #endif /* CONFIG_SCHED_TRADITIONAL */
3983
3984 /*
3985 * Check for a preemption point in
3986 * the current context.
3987 *
3988 * Called at splsched.
3989 */
3990 ast_t
3991 csw_check(
3992 processor_t processor)
3993 {
3994 processor_set_t pset = processor->processor_set;
3995 ast_t result;
3996
3997 pset_lock(pset);
3998
3999 /* If we were sent a remote AST and interrupted a running processor, acknowledge it here with pset lock held */
4000 pset->pending_AST_cpu_mask &= ~(1U << processor->cpu_id);
4001
4002 result = csw_check_locked(processor, pset);
4003
4004 pset_unlock(pset);
4005
4006 return result;
4007 }
4008
4009 /*
4010 * Check for preemption at splsched with
4011 * pset locked
4012 */
4013 ast_t
4014 csw_check_locked(
4015 processor_t processor,
4016 processor_set_t pset __unused)
4017 {
4018 ast_t result = AST_NONE;
4019 thread_t thread = processor->active_thread;
4020
4021 if (first_timeslice(processor)) {
4022 if (rt_runq.count > 0)
4023 return (AST_PREEMPT | AST_URGENT);
4024 }
4025 else {
4026 if (rt_runq.count > 0) {
4027 if (BASEPRI_RTQUEUES > processor->current_pri)
4028 return (AST_PREEMPT | AST_URGENT);
4029 else
4030 return (AST_PREEMPT);
4031 }
4032 }
4033
4034 result = SCHED(processor_csw_check)(processor);
4035 if (result != AST_NONE)
4036 return (result);
4037
4038 if (SCHED(should_current_thread_rechoose_processor)(processor))
4039 return (AST_PREEMPT);
4040
4041 if (machine_processor_is_inactive(processor))
4042 return (AST_PREEMPT);
4043
4044 if (thread->state & TH_SUSP)
4045 return (AST_PREEMPT);
4046
4047 return (AST_NONE);
4048 }
4049
4050 /*
4051 * set_sched_pri:
4052 *
4053 * Set the scheduled priority of the specified thread.
4054 *
4055 * This may cause the thread to change queues.
4056 *
4057 * Thread must be locked.
4058 */
4059 void
4060 set_sched_pri(
4061 thread_t thread,
4062 int priority)
4063 {
4064 boolean_t removed = thread_run_queue_remove(thread);
4065
4066 thread->sched_pri = priority;
4067 if (removed)
4068 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
4069 else
4070 if (thread->state & TH_RUN) {
4071 processor_t processor = thread->last_processor;
4072
4073 if (thread == current_thread()) {
4074 ast_t preempt;
4075
4076 processor->current_pri = priority;
4077 processor->current_thmode = thread->sched_mode;
4078 if ((preempt = csw_check(processor)) != AST_NONE)
4079 ast_on(preempt);
4080 }
4081 else
4082 if ( processor != PROCESSOR_NULL &&
4083 processor->active_thread == thread )
4084 cause_ast_check(processor);
4085 }
4086 }
4087
4088 #if 0
4089
4090 static void
4091 run_queue_check(
4092 run_queue_t rq,
4093 thread_t thread)
4094 {
4095 queue_t q;
4096 queue_entry_t qe;
4097
4098 if (rq != thread->runq)
4099 panic("run_queue_check: thread runq");
4100
4101 if (thread->sched_pri > MAXPRI || thread->sched_pri < MINPRI)
4102 panic("run_queue_check: thread sched_pri");
4103
4104 q = &rq->queues[thread->sched_pri];
4105 qe = queue_first(q);
4106 while (!queue_end(q, qe)) {
4107 if (qe == (queue_entry_t)thread)
4108 return;
4109
4110 qe = queue_next(qe);
4111 }
4112
4113 panic("run_queue_check: end");
4114 }
4115
4116 #endif /* DEBUG */
4117
4118 #if defined(CONFIG_SCHED_TRADITIONAL)
4119
4120 /* locks the runqueue itself */
4121
4122 static boolean_t
4123 processor_queue_remove(
4124 processor_t processor,
4125 thread_t thread)
4126 {
4127 void * rqlock;
4128 run_queue_t rq;
4129
4130 rqlock = &processor->processor_set->sched_lock;
4131 rq = runq_for_processor(processor);
4132
4133 simple_lock(rqlock);
4134 if (processor == thread->runq) {
4135 /*
4136 * Thread is on a run queue and we have a lock on
4137 * that run queue.
4138 */
4139 runq_consider_decr_bound_count(processor, thread);
4140 run_queue_remove(rq, thread);
4141 }
4142 else {
4143 /*
4144 * The thread left the run queue before we could
4145 * lock the run queue.
4146 */
4147 assert(thread->runq == PROCESSOR_NULL);
4148 processor = PROCESSOR_NULL;
4149 }
4150
4151 simple_unlock(rqlock);
4152
4153 return (processor != PROCESSOR_NULL);
4154 }
4155
4156 #endif /* CONFIG_SCHED_TRADITIONAL */
4157
4158 /*
4159 * thread_run_queue_remove:
4160 *
4161 * Remove a thread from a current run queue and
4162 * return TRUE if successful.
4163 *
4164 * Thread must be locked.
4165 */
4166 boolean_t
4167 thread_run_queue_remove(
4168 thread_t thread)
4169 {
4170 processor_t processor = thread->runq;
4171
4172 /*
4173 * If processor is PROCESSOR_NULL, the thread will stay out of the
4174 * run queues because the caller locked the thread. Otherwise
4175 * the thread is on a run queue, but could be chosen for dispatch
4176 * and removed.
4177 */
4178 if (processor != PROCESSOR_NULL) {
4179 queue_t q;
4180
4181 /*
4182 * The processor run queues are locked by the
4183 * processor set. Real-time priorities use a
4184 * global queue with a dedicated lock.
4185 */
4186 if (thread->sched_mode == TH_MODE_FAIRSHARE) {
4187 return SCHED(fairshare_queue_remove)(thread);
4188 }
4189
4190 if (thread->sched_pri < BASEPRI_RTQUEUES) {
4191 return SCHED(processor_queue_remove)(processor, thread);
4192 }
4193
4194 simple_lock(&rt_lock);
4195 q = &rt_runq.queue;
4196
4197 if (processor == thread->runq) {
4198 /*
4199 * Thread is on a run queue and we have a lock on
4200 * that run queue.
4201 */
4202 remqueue((queue_entry_t)thread);
4203 SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
4204 rt_runq.count--;
4205
4206 thread->runq = PROCESSOR_NULL;
4207 }
4208 else {
4209 /*
4210 * The thread left the run queue before we could
4211 * lock the run queue.
4212 */
4213 assert(thread->runq == PROCESSOR_NULL);
4214 processor = PROCESSOR_NULL;
4215 }
4216
4217 simple_unlock(&rt_lock);
4218 }
4219
4220 return (processor != PROCESSOR_NULL);
4221 }
4222
4223 #if defined(CONFIG_SCHED_TRADITIONAL)
4224
4225 /*
4226 * steal_processor_thread:
4227 *
4228 * Locate a thread to steal from the processor and
4229 * return it.
4230 *
4231 * Associated pset must be locked. Returns THREAD_NULL
4232 * on failure.
4233 */
4234 static thread_t
4235 steal_processor_thread(
4236 processor_t processor)
4237 {
4238 run_queue_t rq = runq_for_processor(processor);
4239 queue_t queue = rq->queues + rq->highq;
4240 int pri = rq->highq, count = rq->count;
4241 thread_t thread;
4242
4243 while (count > 0) {
4244 thread = (thread_t)queue_first(queue);
4245 while (!queue_end(queue, (queue_entry_t)thread)) {
4246 if (thread->bound_processor == PROCESSOR_NULL) {
4247 remqueue((queue_entry_t)thread);
4248
4249 thread->runq = PROCESSOR_NULL;
4250 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
4251 runq_consider_decr_bound_count(processor, thread);
4252 rq->count--;
4253 if (SCHED(priority_is_urgent)(pri)) {
4254 rq->urgency--; assert(rq->urgency >= 0);
4255 }
4256 if (queue_empty(queue)) {
4257 if (pri != IDLEPRI)
4258 clrbit(MAXPRI - pri, rq->bitmap);
4259 rq->highq = MAXPRI - ffsbit(rq->bitmap);
4260 }
4261
4262 return (thread);
4263 }
4264 count--;
4265
4266 thread = (thread_t)queue_next((queue_entry_t)thread);
4267 }
4268
4269 queue--; pri--;
4270 }
4271
4272 return (THREAD_NULL);
4273 }
4274
4275 /*
4276 * Locate and steal a thread, beginning
4277 * at the pset.
4278 *
4279 * The pset must be locked, and is returned
4280 * unlocked.
4281 *
4282 * Returns the stolen thread, or THREAD_NULL on
4283 * failure.
4284 */
4285 static thread_t
4286 steal_thread(
4287 processor_set_t pset)
4288 {
4289 processor_set_t nset, cset = pset;
4290 processor_t processor;
4291 thread_t thread;
4292
4293 do {
4294 processor = (processor_t)queue_first(&cset->active_queue);
4295 while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) {
4296 if (runq_for_processor(processor)->count > 0) {
4297 thread = steal_processor_thread(processor);
4298 if (thread != THREAD_NULL) {
4299 remqueue((queue_entry_t)processor);
4300 enqueue_tail(&cset->active_queue, (queue_entry_t)processor);
4301
4302 pset_unlock(cset);
4303
4304 return (thread);
4305 }
4306 }
4307
4308 processor = (processor_t)queue_next((queue_entry_t)processor);
4309 }
4310
4311 nset = next_pset(cset);
4312
4313 if (nset != pset) {
4314 pset_unlock(cset);
4315
4316 cset = nset;
4317 pset_lock(cset);
4318 }
4319 } while (nset != pset);
4320
4321 pset_unlock(cset);
4322
4323 return (THREAD_NULL);
4324 }
4325
4326 static thread_t steal_thread_disabled(
4327 processor_set_t pset)
4328 {
4329 pset_unlock(pset);
4330
4331 return (THREAD_NULL);
4332 }
4333
4334 #endif /* CONFIG_SCHED_TRADITIONAL */
4335
4336
4337 void
4338 sys_override_cpu_throttle(int flag)
4339 {
4340 if (flag == CPU_THROTTLE_ENABLE)
4341 cpu_throttle_enabled = 1;
4342 if (flag == CPU_THROTTLE_DISABLE)
4343 cpu_throttle_enabled = 0;
4344 }
4345
4346 int
4347 thread_get_urgency(thread_t thread, uint64_t *arg1, uint64_t *arg2)
4348 {
4349 if (thread == NULL || (thread->state & TH_IDLE)) {
4350 *arg1 = 0;
4351 *arg2 = 0;
4352
4353 return (THREAD_URGENCY_NONE);
4354 } else if (thread->sched_mode == TH_MODE_REALTIME) {
4355 *arg1 = thread->realtime.period;
4356 *arg2 = thread->realtime.deadline;
4357
4358 return (THREAD_URGENCY_REAL_TIME);
4359 } else if (cpu_throttle_enabled &&
4360 ((thread->sched_pri <= MAXPRI_THROTTLE) && (thread->priority <= MAXPRI_THROTTLE))) {
4361 /*
4362 * Background urgency applied when thread priority is MAXPRI_THROTTLE or lower and thread is not promoted
4363 */
4364 *arg1 = thread->sched_pri;
4365 *arg2 = thread->priority;
4366
4367 return (THREAD_URGENCY_BACKGROUND);
4368 } else {
4369 *arg1 = thread->sched_pri;
4370 *arg2 = thread->priority;
4371
4372 return (THREAD_URGENCY_NORMAL);
4373 }
4374 }
4375
4376
4377 /*
4378 * This is the processor idle loop, which just looks for other threads
4379 * to execute. Processor idle threads invoke this without supplying a
4380 * current thread to idle without an asserted wait state.
4381 *
4382 * Returns a the next thread to execute if dispatched directly.
4383 */
4384
4385 #if 0
4386 #define IDLE_KERNEL_DEBUG_CONSTANT(...) KERNEL_DEBUG_CONSTANT(__VA_ARGS__)
4387 #else
4388 #define IDLE_KERNEL_DEBUG_CONSTANT(...) do { } while(0)
4389 #endif
4390
4391 thread_t
4392 processor_idle(
4393 thread_t thread,
4394 processor_t processor)
4395 {
4396 processor_set_t pset = processor->processor_set;
4397 thread_t new_thread;
4398 int state;
4399 (void)splsched();
4400
4401 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
4402 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_START,
4403 (uintptr_t)thread_tid(thread), 0, 0, 0, 0);
4404
4405 SCHED_STATS_CPU_IDLE_START(processor);
4406
4407 timer_switch(&PROCESSOR_DATA(processor, system_state),
4408 mach_absolute_time(), &PROCESSOR_DATA(processor, idle_state));
4409 PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, idle_state);
4410
4411 while (1) {
4412
4413 if (processor->state != PROCESSOR_IDLE) /* unsafe, but worst case we loop around once */
4414 break;
4415 if (pset->pending_AST_cpu_mask & (1U << processor->cpu_id))
4416 break;
4417 if (!SCHED(processor_queue_empty)(processor))
4418 break;
4419 if (rt_runq.count)
4420 break;
4421 #if CONFIG_SCHED_IDLE_IN_PLACE
4422 if (thread != THREAD_NULL) {
4423 /* Did idle-in-place thread wake up */
4424 if ((thread->state & (TH_WAIT|TH_SUSP)) != TH_WAIT || thread->wake_active)
4425 break;
4426 }
4427 #endif
4428
4429 IDLE_KERNEL_DEBUG_CONSTANT(
4430 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), rt_runq.count, SCHED(processor_runq_count)(processor), -1, 0);
4431
4432 machine_track_platform_idle(TRUE);
4433
4434 machine_idle();
4435
4436 machine_track_platform_idle(FALSE);
4437
4438 (void)splsched();
4439
4440 IDLE_KERNEL_DEBUG_CONSTANT(
4441 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), rt_runq.count, SCHED(processor_runq_count)(processor), -2, 0);
4442
4443 if (processor->state == PROCESSOR_INACTIVE && !machine_processor_is_inactive(processor))
4444 break;
4445 }
4446
4447 timer_switch(&PROCESSOR_DATA(processor, idle_state),
4448 mach_absolute_time(), &PROCESSOR_DATA(processor, system_state));
4449 PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, system_state);
4450
4451 pset_lock(pset);
4452
4453 /* If we were sent a remote AST and came out of idle, acknowledge it here with pset lock held */
4454 pset->pending_AST_cpu_mask &= ~(1U << processor->cpu_id);
4455
4456 state = processor->state;
4457 if (state == PROCESSOR_DISPATCHING) {
4458 /*
4459 * Commmon case -- cpu dispatched.
4460 */
4461 new_thread = processor->next_thread;
4462 processor->next_thread = THREAD_NULL;
4463 processor->state = PROCESSOR_RUNNING;
4464
4465 if ((new_thread != THREAD_NULL) && (SCHED(processor_queue_has_priority)(processor, new_thread->sched_pri, FALSE) ||
4466 (rt_runq.count > 0 && BASEPRI_RTQUEUES >= new_thread->sched_pri)) ) {
4467 processor->current_pri = IDLEPRI;
4468 processor->current_thmode = TH_MODE_FIXED;
4469 processor->deadline = UINT64_MAX;
4470
4471 pset_unlock(pset);
4472
4473 thread_lock(new_thread);
4474 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REDISPATCH), (uintptr_t)thread_tid(new_thread), new_thread->sched_pri, rt_runq.count, 0, 0);
4475 thread_setrun(new_thread, SCHED_HEADQ);
4476 thread_unlock(new_thread);
4477
4478 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
4479 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END,
4480 (uintptr_t)thread_tid(thread), state, 0, 0, 0);
4481
4482 return (THREAD_NULL);
4483 }
4484
4485 pset_unlock(pset);
4486
4487 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
4488 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END,
4489 (uintptr_t)thread_tid(thread), state, (uintptr_t)thread_tid(new_thread), 0, 0);
4490
4491 return (new_thread);
4492 }
4493 else
4494 if (state == PROCESSOR_IDLE) {
4495 remqueue((queue_entry_t)processor);
4496
4497 processor->state = PROCESSOR_RUNNING;
4498 processor->current_pri = IDLEPRI;
4499 processor->current_thmode = TH_MODE_FIXED;
4500 processor->deadline = UINT64_MAX;
4501 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
4502 }
4503 else
4504 if (state == PROCESSOR_INACTIVE) {
4505 processor->state = PROCESSOR_RUNNING;
4506 enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
4507 }
4508 else
4509 if (state == PROCESSOR_SHUTDOWN) {
4510 /*
4511 * Going off-line. Force a
4512 * reschedule.
4513 */
4514 if ((new_thread = processor->next_thread) != THREAD_NULL) {
4515 processor->next_thread = THREAD_NULL;
4516 processor->current_pri = IDLEPRI;
4517 processor->current_thmode = TH_MODE_FIXED;
4518 processor->deadline = UINT64_MAX;
4519
4520 pset_unlock(pset);
4521
4522 thread_lock(new_thread);
4523 thread_setrun(new_thread, SCHED_HEADQ);
4524 thread_unlock(new_thread);
4525
4526 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
4527 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END,
4528 (uintptr_t)thread_tid(thread), state, 0, 0, 0);
4529
4530 return (THREAD_NULL);
4531 }
4532 }
4533
4534 pset_unlock(pset);
4535
4536 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
4537 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END,
4538 (uintptr_t)thread_tid(thread), state, 0, 0, 0);
4539
4540 return (THREAD_NULL);
4541 }
4542
4543 /*
4544 * Each processor has a dedicated thread which
4545 * executes the idle loop when there is no suitable
4546 * previous context.
4547 */
4548 void
4549 idle_thread(void)
4550 {
4551 processor_t processor = current_processor();
4552 thread_t new_thread;
4553
4554 new_thread = processor_idle(THREAD_NULL, processor);
4555 if (new_thread != THREAD_NULL) {
4556 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
4557 /*NOTREACHED*/
4558 }
4559
4560 thread_block((thread_continue_t)idle_thread);
4561 /*NOTREACHED*/
4562 }
4563
4564 kern_return_t
4565 idle_thread_create(
4566 processor_t processor)
4567 {
4568 kern_return_t result;
4569 thread_t thread;
4570 spl_t s;
4571
4572 result = kernel_thread_create((thread_continue_t)idle_thread, NULL, MAXPRI_KERNEL, &thread);
4573 if (result != KERN_SUCCESS)
4574 return (result);
4575
4576 s = splsched();
4577 thread_lock(thread);
4578 thread->bound_processor = processor;
4579 processor->idle_thread = thread;
4580 thread->sched_pri = thread->priority = IDLEPRI;
4581 thread->state = (TH_RUN | TH_IDLE);
4582 thread->options |= TH_OPT_IDLE_THREAD;
4583 thread_unlock(thread);
4584 splx(s);
4585
4586 thread_deallocate(thread);
4587
4588 return (KERN_SUCCESS);
4589 }
4590
4591 /*
4592 * sched_startup:
4593 *
4594 * Kicks off scheduler services.
4595 *
4596 * Called at splsched.
4597 */
4598 void
4599 sched_startup(void)
4600 {
4601 kern_return_t result;
4602 thread_t thread;
4603
4604 result = kernel_thread_start_priority((thread_continue_t)sched_init_thread,
4605 (void *)SCHED(maintenance_continuation), MAXPRI_KERNEL, &thread);
4606 if (result != KERN_SUCCESS)
4607 panic("sched_startup");
4608
4609 thread_deallocate(thread);
4610
4611 /*
4612 * Yield to the sched_init_thread once, to
4613 * initialize our own thread after being switched
4614 * back to.
4615 *
4616 * The current thread is the only other thread
4617 * active at this point.
4618 */
4619 thread_block(THREAD_CONTINUE_NULL);
4620 }
4621
4622 #if defined(CONFIG_SCHED_TRADITIONAL)
4623
4624 static volatile uint64_t sched_maintenance_deadline;
4625 static uint64_t sched_tick_last_abstime;
4626 static uint64_t sched_tick_delta;
4627 uint64_t sched_tick_max_delta;
4628 /*
4629 * sched_init_thread:
4630 *
4631 * Perform periodic bookkeeping functions about ten
4632 * times per second.
4633 */
4634 static void
4635 sched_traditional_maintenance_continue(void)
4636 {
4637 uint64_t sched_tick_ctime;
4638 sched_tick_ctime = mach_absolute_time();
4639
4640 if (__improbable(sched_tick_last_abstime == 0)) {
4641 sched_tick_last_abstime = sched_tick_ctime;
4642 sched_tick_delta = 1;
4643 } else {
4644 sched_tick_delta = ((sched_tick_ctime) - sched_tick_last_abstime) / sched_tick_interval;
4645 /* Ensure a delta of 1, since the interval could be slightly
4646 * smaller than the sched_tick_interval due to dispatch
4647 * latencies.
4648 */
4649 sched_tick_delta = MAX(sched_tick_delta, 1);
4650
4651 /* In the event interrupt latencies or platform
4652 * idle events that advanced the timebase resulted
4653 * in periods where no threads were dispatched,
4654 * cap the maximum "tick delta" at SCHED_TICK_MAX_DELTA
4655 * iterations.
4656 */
4657 sched_tick_delta = MIN(sched_tick_delta, SCHED_TICK_MAX_DELTA);
4658
4659 sched_tick_last_abstime = sched_tick_ctime;
4660 sched_tick_max_delta = MAX(sched_tick_delta, sched_tick_max_delta);
4661 }
4662
4663 /* Add a number of pseudo-ticks corresponding to the elapsed interval
4664 * This could be greater than 1 if substantial intervals where
4665 * all processors are idle occur, which rarely occurs in practice.
4666 */
4667
4668 sched_tick += sched_tick_delta;
4669
4670 /*
4671 * Compute various averages.
4672 */
4673 compute_averages(sched_tick_delta);
4674
4675 /*
4676 * Scan the run queues for threads which
4677 * may need to be updated.
4678 */
4679 thread_update_scan();
4680
4681 assert_wait((event_t)sched_traditional_maintenance_continue, THREAD_UNINT);
4682 thread_block((thread_continue_t)sched_traditional_maintenance_continue);
4683 /*NOTREACHED*/
4684 }
4685
4686 static uint64_t sched_maintenance_wakeups;
4687
4688 /*
4689 * Determine if the set of routines formerly driven by a maintenance timer
4690 * must be invoked, based on a deadline comparison. Signals the scheduler
4691 * maintenance thread on deadline expiration. Must be invoked at an interval
4692 * lower than the "sched_tick_interval", currently accomplished by
4693 * invocation via the quantum expiration timer and at context switch time.
4694 * Performance matters: this routine reuses a timestamp approximating the
4695 * current absolute time received from the caller, and should perform
4696 * no more than a comparison against the deadline in the common case.
4697 */
4698 void
4699 sched_traditional_consider_maintenance(uint64_t ctime) {
4700 uint64_t ndeadline, deadline = sched_maintenance_deadline;
4701
4702 if (__improbable(ctime >= deadline)) {
4703 if (__improbable(current_thread() == sched_maintenance_thread))
4704 return;
4705 OSMemoryBarrier();
4706
4707 ndeadline = ctime + sched_tick_interval;
4708
4709 if (__probable(__sync_bool_compare_and_swap(&sched_maintenance_deadline, deadline, ndeadline))) {
4710 thread_wakeup((event_t)sched_traditional_maintenance_continue);
4711 sched_maintenance_wakeups++;
4712 }
4713 }
4714 }
4715
4716 #endif /* CONFIG_SCHED_TRADITIONAL */
4717
4718 void
4719 sched_init_thread(void (*continuation)(void))
4720 {
4721 thread_block(THREAD_CONTINUE_NULL);
4722
4723 sched_maintenance_thread = current_thread();
4724 continuation();
4725
4726 /*NOTREACHED*/
4727 }
4728
4729 #if defined(CONFIG_SCHED_TRADITIONAL)
4730
4731 /*
4732 * thread_update_scan / runq_scan:
4733 *
4734 * Scan the run queues to account for timesharing threads
4735 * which need to be updated.
4736 *
4737 * Scanner runs in two passes. Pass one squirrels likely
4738 * threads away in an array, pass two does the update.
4739 *
4740 * This is necessary because the run queue is locked for
4741 * the candidate scan, but the thread is locked for the update.
4742 *
4743 * Array should be sized to make forward progress, without
4744 * disabling preemption for long periods.
4745 */
4746
4747 #define THREAD_UPDATE_SIZE 128
4748
4749 static thread_t thread_update_array[THREAD_UPDATE_SIZE];
4750 static int thread_update_count = 0;
4751
4752 /*
4753 * Scan a runq for candidate threads.
4754 *
4755 * Returns TRUE if retry is needed.
4756 */
4757 static boolean_t
4758 runq_scan(
4759 run_queue_t runq)
4760 {
4761 register int count;
4762 register queue_t q;
4763 register thread_t thread;
4764
4765 if ((count = runq->count) > 0) {
4766 q = runq->queues + runq->highq;
4767 while (count > 0) {
4768 queue_iterate(q, thread, thread_t, links) {
4769 if ( thread->sched_stamp != sched_tick &&
4770 (thread->sched_mode == TH_MODE_TIMESHARE) ) {
4771 if (thread_update_count == THREAD_UPDATE_SIZE)
4772 return (TRUE);
4773
4774 thread_update_array[thread_update_count++] = thread;
4775 thread_reference_internal(thread);
4776 }
4777
4778 count--;
4779 }
4780
4781 q--;
4782 }
4783 }
4784
4785 return (FALSE);
4786 }
4787
4788 static void
4789 thread_update_scan(void)
4790 {
4791 boolean_t restart_needed = FALSE;
4792 processor_t processor = processor_list;
4793 processor_set_t pset;
4794 thread_t thread;
4795 spl_t s;
4796
4797 do {
4798 do {
4799 pset = processor->processor_set;
4800
4801 s = splsched();
4802 pset_lock(pset);
4803
4804 restart_needed = runq_scan(runq_for_processor(processor));
4805
4806 pset_unlock(pset);
4807 splx(s);
4808
4809 if (restart_needed)
4810 break;
4811
4812 thread = processor->idle_thread;
4813 if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) {
4814 if (thread_update_count == THREAD_UPDATE_SIZE) {
4815 restart_needed = TRUE;
4816 break;
4817 }
4818
4819 thread_update_array[thread_update_count++] = thread;
4820 thread_reference_internal(thread);
4821 }
4822 } while ((processor = processor->processor_list) != NULL);
4823
4824 /*
4825 * Ok, we now have a collection of candidates -- fix them.
4826 */
4827 while (thread_update_count > 0) {
4828 thread = thread_update_array[--thread_update_count];
4829 thread_update_array[thread_update_count] = THREAD_NULL;
4830
4831 s = splsched();
4832 thread_lock(thread);
4833 if ( !(thread->state & (TH_WAIT)) ) {
4834 if (SCHED(can_update_priority)(thread))
4835 SCHED(update_priority)(thread);
4836 }
4837 thread_unlock(thread);
4838 splx(s);
4839
4840 thread_deallocate(thread);
4841 }
4842 } while (restart_needed);
4843 }
4844
4845 #endif /* CONFIG_SCHED_TRADITIONAL */
4846
4847 boolean_t
4848 thread_eager_preemption(thread_t thread)
4849 {
4850 return ((thread->sched_flags & TH_SFLAG_EAGERPREEMPT) != 0);
4851 }
4852
4853 void
4854 thread_set_eager_preempt(thread_t thread)
4855 {
4856 spl_t x;
4857 processor_t p;
4858 ast_t ast = AST_NONE;
4859
4860 x = splsched();
4861 p = current_processor();
4862
4863 thread_lock(thread);
4864 thread->sched_flags |= TH_SFLAG_EAGERPREEMPT;
4865
4866 if (thread == current_thread()) {
4867 thread_unlock(thread);
4868
4869 ast = csw_check(p);
4870 if (ast != AST_NONE) {
4871 (void) thread_block_reason(THREAD_CONTINUE_NULL, NULL, ast);
4872 }
4873 } else {
4874 p = thread->last_processor;
4875
4876 if (p != PROCESSOR_NULL && p->state == PROCESSOR_RUNNING &&
4877 p->active_thread == thread) {
4878 cause_ast_check(p);
4879 }
4880
4881 thread_unlock(thread);
4882 }
4883
4884 splx(x);
4885 }
4886
4887 void
4888 thread_clear_eager_preempt(thread_t thread)
4889 {
4890 spl_t x;
4891
4892 x = splsched();
4893 thread_lock(thread);
4894
4895 thread->sched_flags &= ~TH_SFLAG_EAGERPREEMPT;
4896
4897 thread_unlock(thread);
4898 splx(x);
4899 }
4900 /*
4901 * Scheduling statistics
4902 */
4903 void
4904 sched_stats_handle_csw(processor_t processor, int reasons, int selfpri, int otherpri)
4905 {
4906 struct processor_sched_statistics *stats;
4907 boolean_t to_realtime = FALSE;
4908
4909 stats = &processor->processor_data.sched_stats;
4910 stats->csw_count++;
4911
4912 if (otherpri >= BASEPRI_REALTIME) {
4913 stats->rt_sched_count++;
4914 to_realtime = TRUE;
4915 }
4916
4917 if ((reasons & AST_PREEMPT) != 0) {
4918 stats->preempt_count++;
4919
4920 if (selfpri >= BASEPRI_REALTIME) {
4921 stats->preempted_rt_count++;
4922 }
4923
4924 if (to_realtime) {
4925 stats->preempted_by_rt_count++;
4926 }
4927
4928 }
4929 }
4930
4931 void
4932 sched_stats_handle_runq_change(struct runq_stats *stats, int old_count)
4933 {
4934 uint64_t timestamp = mach_absolute_time();
4935
4936 stats->count_sum += (timestamp - stats->last_change_timestamp) * old_count;
4937 stats->last_change_timestamp = timestamp;
4938 }
4939
4940 /*
4941 * For calls from assembly code
4942 */
4943 #undef thread_wakeup
4944 void
4945 thread_wakeup(
4946 event_t x);
4947
4948 void
4949 thread_wakeup(
4950 event_t x)
4951 {
4952 thread_wakeup_with_result(x, THREAD_AWAKENED);
4953 }
4954
4955 boolean_t
4956 preemption_enabled(void)
4957 {
4958 return (get_preemption_level() == 0 && ml_get_interrupts_enabled());
4959 }
4960
4961 #if DEBUG
4962 static boolean_t
4963 thread_runnable(
4964 thread_t thread)
4965 {
4966 return ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN);
4967 }
4968 #endif /* DEBUG */
4969
4970 static void
4971 sched_timer_deadline_tracking_init(void) {
4972 nanoseconds_to_absolutetime(TIMER_DEADLINE_TRACKING_BIN_1_DEFAULT, &timer_deadline_tracking_bin_1);
4973 nanoseconds_to_absolutetime(TIMER_DEADLINE_TRACKING_BIN_2_DEFAULT, &timer_deadline_tracking_bin_2);
4974 }