2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * @OSF_FREE_COPYRIGHT@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr.
63 * Scheduling primitives
69 #include <mach/mach_types.h>
70 #include <mach/machine.h>
71 #include <mach/policy.h>
72 #include <mach/sync_policy.h>
73 #include <mach/thread_act.h>
75 #include <machine/machine_routines.h>
76 #include <machine/sched_param.h>
77 #include <machine/machine_cpu.h>
78 #include <machine/machlimits.h>
80 #ifdef CONFIG_MACH_APPROXIMATE_TIME
81 #include <machine/commpage.h>
84 #include <kern/kern_types.h>
85 #include <kern/clock.h>
86 #include <kern/counters.h>
87 #include <kern/cpu_number.h>
88 #include <kern/cpu_data.h>
89 #include <kern/debug.h>
90 #include <kern/macro_help.h>
91 #include <kern/machine.h>
92 #include <kern/misc_protos.h>
93 #include <kern/processor.h>
94 #include <kern/queue.h>
95 #include <kern/sched.h>
96 #include <kern/sched_prim.h>
98 #include <kern/syscall_subr.h>
99 #include <kern/task.h>
100 #include <kern/thread.h>
101 #include <kern/wait_queue.h>
102 #include <kern/ledger.h>
103 #include <kern/timer_queue.h>
106 #include <vm/vm_kern.h>
107 #include <vm/vm_map.h>
109 #include <mach/sdt.h>
111 #include <sys/kdebug.h>
113 #include <kern/pms.h>
115 #if defined(CONFIG_TELEMETRY) && defined(CONFIG_SCHED_TIMESHARE_CORE)
116 #include <kern/telemetry.h>
119 struct rt_queue rt_runq
;
120 #define RT_RUNQ ((processor_t)-1)
121 decl_simple_lock_data(static,rt_lock
);
123 #if defined(CONFIG_SCHED_FAIRSHARE_CORE)
124 static struct fairshare_queue fs_runq
;
125 #define FS_RUNQ ((processor_t)-2)
126 decl_simple_lock_data(static,fs_lock
);
127 #endif /* CONFIG_SCHED_FAIRSHARE_CORE */
129 #define DEFAULT_PREEMPTION_RATE 100 /* (1/s) */
130 int default_preemption_rate
= DEFAULT_PREEMPTION_RATE
;
132 #define DEFAULT_BG_PREEMPTION_RATE 400 /* (1/s) */
133 int default_bg_preemption_rate
= DEFAULT_BG_PREEMPTION_RATE
;
135 #define MAX_UNSAFE_QUANTA 800
136 int max_unsafe_quanta
= MAX_UNSAFE_QUANTA
;
138 #define MAX_POLL_QUANTA 2
139 int max_poll_quanta
= MAX_POLL_QUANTA
;
141 #define SCHED_POLL_YIELD_SHIFT 4 /* 1/16 */
142 int sched_poll_yield_shift
= SCHED_POLL_YIELD_SHIFT
;
144 uint64_t max_poll_computation
;
146 uint64_t max_unsafe_computation
;
147 uint64_t sched_safe_duration
;
149 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
151 uint32_t std_quantum
;
152 uint32_t min_std_quantum
;
155 uint32_t std_quantum_us
;
156 uint32_t bg_quantum_us
;
158 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
160 uint32_t thread_depress_time
;
161 uint32_t default_timeshare_computation
;
162 uint32_t default_timeshare_constraint
;
164 uint32_t max_rt_quantum
;
165 uint32_t min_rt_quantum
;
167 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
170 uint32_t sched_tick_interval
;
171 #if defined(CONFIG_TELEMETRY)
172 uint32_t sched_telemetry_interval
;
173 #endif /* CONFIG_TELEMETRY */
175 uint32_t sched_pri_shift
= INT8_MAX
;
176 uint32_t sched_background_pri_shift
= INT8_MAX
;
177 uint32_t sched_combined_fgbg_pri_shift
= INT8_MAX
;
178 uint32_t sched_fixed_shift
;
179 uint32_t sched_use_combined_fgbg_decay
= 0;
181 uint32_t sched_decay_usage_age_factor
= 1; /* accelerate 5/8^n usage aging */
183 /* Allow foreground to decay past default to resolve inversions */
184 #define DEFAULT_DECAY_BAND_LIMIT ((BASEPRI_FOREGROUND - BASEPRI_DEFAULT) + 2)
185 int sched_pri_decay_band_limit
= DEFAULT_DECAY_BAND_LIMIT
;
187 /* Defaults for timer deadline profiling */
188 #define TIMER_DEADLINE_TRACKING_BIN_1_DEFAULT 2000000 /* Timers with deadlines <=
190 #define TIMER_DEADLINE_TRACKING_BIN_2_DEFAULT 5000000 /* Timers with deadlines
193 uint64_t timer_deadline_tracking_bin_1
;
194 uint64_t timer_deadline_tracking_bin_2
;
196 thread_t sched_maintenance_thread
;
198 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
200 #if defined(CONFIG_SCHED_TRADITIONAL)
202 static boolean_t sched_traditional_use_pset_runqueue
= FALSE
;
204 __attribute__((always_inline
))
205 static inline run_queue_t
runq_for_processor(processor_t processor
)
207 if (sched_traditional_use_pset_runqueue
)
208 return &processor
->processor_set
->pset_runq
;
210 return &processor
->runq
;
213 __attribute__((always_inline
))
214 static inline void runq_consider_incr_bound_count(processor_t processor
, thread_t thread
)
216 if (thread
->bound_processor
== PROCESSOR_NULL
)
219 assert(thread
->bound_processor
== processor
);
221 if (sched_traditional_use_pset_runqueue
)
222 processor
->processor_set
->pset_runq_bound_count
++;
224 processor
->runq_bound_count
++;
227 __attribute__((always_inline
))
228 static inline void runq_consider_decr_bound_count(processor_t processor
, thread_t thread
)
230 if (thread
->bound_processor
== PROCESSOR_NULL
)
233 assert(thread
->bound_processor
== processor
);
235 if (sched_traditional_use_pset_runqueue
)
236 processor
->processor_set
->pset_runq_bound_count
--;
238 processor
->runq_bound_count
--;
241 #endif /* CONFIG_SCHED_TRADITIONAL */
243 uint64_t sched_one_second_interval
;
245 uint32_t sched_run_count
, sched_share_count
, sched_background_count
;
246 uint32_t sched_load_average
, sched_mach_factor
;
250 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
252 static void load_shift_init(void);
253 static void preempt_pri_init(void);
255 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
257 static thread_t
thread_select(
259 processor_t processor
,
262 #if CONFIG_SCHED_IDLE_IN_PLACE
263 static thread_t
thread_select_idle(
265 processor_t processor
);
268 thread_t
processor_idle(
270 processor_t processor
);
273 csw_check_locked( processor_t processor
,
274 processor_set_t pset
,
277 #if defined(CONFIG_SCHED_TRADITIONAL)
279 static thread_t
steal_thread(
280 processor_set_t pset
);
282 static thread_t
steal_thread_disabled(
283 processor_set_t pset
) __attribute__((unused
));
286 static thread_t
steal_processor_thread(
287 processor_t processor
);
289 static void thread_update_scan(void);
291 static void processor_setrun(
292 processor_t processor
,
298 processor_t processor
,
303 processor_queue_remove(
304 processor_t processor
,
307 static boolean_t
processor_queue_empty(processor_t processor
);
309 static ast_t
processor_csw_check(processor_t processor
);
311 static boolean_t
processor_queue_has_priority(processor_t processor
,
315 static boolean_t
should_current_thread_rechoose_processor(processor_t processor
);
317 static int sched_traditional_processor_runq_count(processor_t processor
);
319 static boolean_t
sched_traditional_with_pset_runqueue_processor_queue_empty(processor_t processor
);
321 static uint64_t sched_traditional_processor_runq_stats_count_sum(processor_t processor
);
323 static uint64_t sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum(processor_t processor
);
325 static int sched_traditional_processor_bound_count(processor_t processor
);
330 #if defined(CONFIG_SCHED_TRADITIONAL)
333 sched_traditional_processor_init(processor_t processor
);
336 sched_traditional_pset_init(processor_set_t pset
);
339 sched_traditional_with_pset_runqueue_init(void);
344 sched_realtime_init(void);
347 sched_realtime_timebase_init(void);
350 sched_timer_deadline_tracking_init(void);
352 #if defined(CONFIG_SCHED_TRADITIONAL)
355 sched_traditional_initial_thread_sched_mode(task_t parent_task
);
358 sched_traditional_choose_thread(
359 processor_t processor
,
361 __unused ast_t reason
);
366 extern int debug_task
;
367 #define TLOG(a, fmt, args...) if(debug_task & a) kprintf(fmt, ## args)
369 #define TLOG(a, fmt, args...) do {} while (0)
373 boolean_t
thread_runnable(
379 * states are combinations of:
381 * W waiting (or on wait queue)
382 * N non-interruptible
387 * assert_wait thread_block clear_wait swapout swapin
389 * R RW, RWN R; setrun - -
390 * RN RWN RN; setrun - -
402 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
403 int8_t sched_load_shifts
[NRQS
];
404 int sched_preempt_pri
[NRQBM
];
405 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
408 #if defined(CONFIG_SCHED_TRADITIONAL)
410 const struct sched_dispatch_table sched_traditional_dispatch
= {
411 .init
= sched_traditional_init
,
412 .timebase_init
= sched_traditional_timebase_init
,
413 .processor_init
= sched_traditional_processor_init
,
414 .pset_init
= sched_traditional_pset_init
,
415 .maintenance_continuation
= sched_traditional_maintenance_continue
,
416 .choose_thread
= sched_traditional_choose_thread
,
417 .steal_thread
= steal_thread
,
418 .compute_priority
= compute_priority
,
419 .choose_processor
= choose_processor
,
420 .processor_enqueue
= processor_enqueue
,
421 .processor_queue_shutdown
= processor_queue_shutdown
,
422 .processor_queue_remove
= processor_queue_remove
,
423 .processor_queue_empty
= processor_queue_empty
,
424 .priority_is_urgent
= priority_is_urgent
,
425 .processor_csw_check
= processor_csw_check
,
426 .processor_queue_has_priority
= processor_queue_has_priority
,
427 .initial_quantum_size
= sched_traditional_initial_quantum_size
,
428 .initial_thread_sched_mode
= sched_traditional_initial_thread_sched_mode
,
429 .can_update_priority
= can_update_priority
,
430 .update_priority
= update_priority
,
431 .lightweight_update_priority
= lightweight_update_priority
,
432 .quantum_expire
= sched_traditional_quantum_expire
,
433 .should_current_thread_rechoose_processor
= should_current_thread_rechoose_processor
,
434 .processor_runq_count
= sched_traditional_processor_runq_count
,
435 .processor_runq_stats_count_sum
= sched_traditional_processor_runq_stats_count_sum
,
436 .fairshare_init
= sched_traditional_fairshare_init
,
437 .fairshare_runq_count
= sched_traditional_fairshare_runq_count
,
438 .fairshare_runq_stats_count_sum
= sched_traditional_fairshare_runq_stats_count_sum
,
439 .fairshare_enqueue
= sched_traditional_fairshare_enqueue
,
440 .fairshare_dequeue
= sched_traditional_fairshare_dequeue
,
441 .fairshare_queue_remove
= sched_traditional_fairshare_queue_remove
,
442 .processor_bound_count
= sched_traditional_processor_bound_count
,
443 .thread_update_scan
= thread_update_scan
,
444 .direct_dispatch_to_idle_processors
= TRUE
,
447 const struct sched_dispatch_table sched_traditional_with_pset_runqueue_dispatch
= {
448 .init
= sched_traditional_with_pset_runqueue_init
,
449 .timebase_init
= sched_traditional_timebase_init
,
450 .processor_init
= sched_traditional_processor_init
,
451 .pset_init
= sched_traditional_pset_init
,
452 .maintenance_continuation
= sched_traditional_maintenance_continue
,
453 .choose_thread
= sched_traditional_choose_thread
,
454 .steal_thread
= steal_thread
,
455 .compute_priority
= compute_priority
,
456 .choose_processor
= choose_processor
,
457 .processor_enqueue
= processor_enqueue
,
458 .processor_queue_shutdown
= processor_queue_shutdown
,
459 .processor_queue_remove
= processor_queue_remove
,
460 .processor_queue_empty
= sched_traditional_with_pset_runqueue_processor_queue_empty
,
461 .priority_is_urgent
= priority_is_urgent
,
462 .processor_csw_check
= processor_csw_check
,
463 .processor_queue_has_priority
= processor_queue_has_priority
,
464 .initial_quantum_size
= sched_traditional_initial_quantum_size
,
465 .initial_thread_sched_mode
= sched_traditional_initial_thread_sched_mode
,
466 .can_update_priority
= can_update_priority
,
467 .update_priority
= update_priority
,
468 .lightweight_update_priority
= lightweight_update_priority
,
469 .quantum_expire
= sched_traditional_quantum_expire
,
470 .should_current_thread_rechoose_processor
= should_current_thread_rechoose_processor
,
471 .processor_runq_count
= sched_traditional_processor_runq_count
,
472 .processor_runq_stats_count_sum
= sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum
,
473 .fairshare_init
= sched_traditional_fairshare_init
,
474 .fairshare_runq_count
= sched_traditional_fairshare_runq_count
,
475 .fairshare_runq_stats_count_sum
= sched_traditional_fairshare_runq_stats_count_sum
,
476 .fairshare_enqueue
= sched_traditional_fairshare_enqueue
,
477 .fairshare_dequeue
= sched_traditional_fairshare_dequeue
,
478 .fairshare_queue_remove
= sched_traditional_fairshare_queue_remove
,
479 .processor_bound_count
= sched_traditional_processor_bound_count
,
480 .thread_update_scan
= thread_update_scan
,
481 .direct_dispatch_to_idle_processors
= FALSE
,
486 const struct sched_dispatch_table
*sched_current_dispatch
= NULL
;
489 * Statically allocate a buffer to hold the longest possible
490 * scheduler description string, as currently implemented.
491 * bsd/kern/kern_sysctl.c has a corresponding definition in bsd/
492 * to export to userspace via sysctl(3). If either version
493 * changes, update the other.
495 * Note that in addition to being an upper bound on the strings
496 * in the kernel, it's also an exact parameter to PE_get_default(),
497 * which interrogates the device tree on some platforms. That
498 * API requires the caller know the exact size of the device tree
499 * property, so we need both a legacy size (32) and the current size
500 * (48) to deal with old and new device trees. The device tree property
501 * is similarly padded to a fixed size so that the same kernel image
502 * can run on multiple devices with different schedulers configured
503 * in the device tree.
505 #define SCHED_STRING_MAX_LENGTH (48)
507 char sched_string
[SCHED_STRING_MAX_LENGTH
];
508 static enum sched_enum _sched_enum
__attribute__((used
)) = sched_enum_unknown
;
510 /* Global flag which indicates whether Background Stepper Context is enabled */
511 static int cpu_throttle_enabled
= 1;
516 char sched_arg
[SCHED_STRING_MAX_LENGTH
] = { '\0' };
518 /* Check for runtime selection of the scheduler algorithm */
519 if (!PE_parse_boot_argn("sched", sched_arg
, sizeof (sched_arg
))) {
520 /* If no boot-args override, look in device tree */
521 if (!PE_get_default("kern.sched", sched_arg
,
522 SCHED_STRING_MAX_LENGTH
)) {
528 if (!PE_parse_boot_argn("sched_pri_decay_limit", &sched_pri_decay_band_limit
, sizeof(sched_pri_decay_band_limit
))) {
529 /* No boot-args, check in device tree */
530 if (!PE_get_default("kern.sched_pri_decay_limit",
531 &sched_pri_decay_band_limit
,
532 sizeof(sched_pri_decay_band_limit
))) {
533 /* Allow decay all the way to normal limits */
534 sched_pri_decay_band_limit
= DEFAULT_DECAY_BAND_LIMIT
;
538 kprintf("Setting scheduler priority decay band limit %d\n", sched_pri_decay_band_limit
);
540 if (strlen(sched_arg
) > 0) {
542 /* Allow pattern below */
543 #if defined(CONFIG_SCHED_TRADITIONAL)
544 } else if (0 == strcmp(sched_arg
, kSchedTraditionalString
)) {
545 sched_current_dispatch
= &sched_traditional_dispatch
;
546 _sched_enum
= sched_enum_traditional
;
547 strlcpy(sched_string
, kSchedTraditionalString
, sizeof(sched_string
));
548 } else if (0 == strcmp(sched_arg
, kSchedTraditionalWithPsetRunqueueString
)) {
549 sched_current_dispatch
= &sched_traditional_with_pset_runqueue_dispatch
;
550 _sched_enum
= sched_enum_traditional_with_pset_runqueue
;
551 strlcpy(sched_string
, kSchedTraditionalWithPsetRunqueueString
, sizeof(sched_string
));
553 #if defined(CONFIG_SCHED_PROTO)
554 } else if (0 == strcmp(sched_arg
, kSchedProtoString
)) {
555 sched_current_dispatch
= &sched_proto_dispatch
;
556 _sched_enum
= sched_enum_proto
;
557 strlcpy(sched_string
, kSchedProtoString
, sizeof(sched_string
));
559 #if defined(CONFIG_SCHED_GRRR)
560 } else if (0 == strcmp(sched_arg
, kSchedGRRRString
)) {
561 sched_current_dispatch
= &sched_grrr_dispatch
;
562 _sched_enum
= sched_enum_grrr
;
563 strlcpy(sched_string
, kSchedGRRRString
, sizeof(sched_string
));
565 #if defined(CONFIG_SCHED_MULTIQ)
566 } else if (0 == strcmp(sched_arg
, kSchedMultiQString
)) {
567 sched_current_dispatch
= &sched_multiq_dispatch
;
568 _sched_enum
= sched_enum_multiq
;
569 strlcpy(sched_string
, kSchedMultiQString
, sizeof(sched_string
));
570 } else if (0 == strcmp(sched_arg
, kSchedDualQString
)) {
571 sched_current_dispatch
= &sched_dualq_dispatch
;
572 _sched_enum
= sched_enum_dualq
;
573 strlcpy(sched_string
, kSchedDualQString
, sizeof(sched_string
));
576 #if defined(CONFIG_SCHED_TRADITIONAL)
577 printf("Unrecognized scheduler algorithm: %s\n", sched_arg
);
578 printf("Scheduler: Using instead: %s\n", kSchedTraditionalWithPsetRunqueueString
);
580 sched_current_dispatch
= &sched_traditional_with_pset_runqueue_dispatch
;
581 _sched_enum
= sched_enum_traditional_with_pset_runqueue
;
582 strlcpy(sched_string
, kSchedTraditionalWithPsetRunqueueString
, sizeof(sched_string
));
584 panic("Unrecognized scheduler algorithm: %s", sched_arg
);
587 kprintf("Scheduler: Runtime selection of %s\n", sched_string
);
589 #if defined(CONFIG_SCHED_MULTIQ)
590 sched_current_dispatch
= &sched_multiq_dispatch
;
591 _sched_enum
= sched_enum_multiq
;
592 strlcpy(sched_string
, kSchedMultiQString
, sizeof(sched_string
));
593 #elif defined(CONFIG_SCHED_TRADITIONAL)
594 sched_current_dispatch
= &sched_traditional_with_pset_runqueue_dispatch
;
595 _sched_enum
= sched_enum_traditional_with_pset_runqueue
;
596 strlcpy(sched_string
, kSchedTraditionalWithPsetRunqueueString
, sizeof(sched_string
));
597 #elif defined(CONFIG_SCHED_PROTO)
598 sched_current_dispatch
= &sched_proto_dispatch
;
599 _sched_enum
= sched_enum_proto
;
600 strlcpy(sched_string
, kSchedProtoString
, sizeof(sched_string
));
601 #elif defined(CONFIG_SCHED_GRRR)
602 sched_current_dispatch
= &sched_grrr_dispatch
;
603 _sched_enum
= sched_enum_grrr
;
604 strlcpy(sched_string
, kSchedGRRRString
, sizeof(sched_string
));
606 #error No default scheduler implementation
608 kprintf("Scheduler: Default of %s\n", sched_string
);
612 SCHED(fairshare_init
)();
613 sched_realtime_init();
615 sched_timer_deadline_tracking_init();
617 SCHED(pset_init
)(&pset0
);
618 SCHED(processor_init
)(master_processor
);
622 sched_timebase_init(void)
626 clock_interval_to_absolutetime_interval(1, NSEC_PER_SEC
, &abstime
);
627 sched_one_second_interval
= abstime
;
629 SCHED(timebase_init
)();
630 sched_realtime_timebase_init();
633 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
636 sched_traditional_init(void)
639 * Calculate the timeslicing quantum
642 if (default_preemption_rate
< 1)
643 default_preemption_rate
= DEFAULT_PREEMPTION_RATE
;
644 std_quantum_us
= (1000 * 1000) / default_preemption_rate
;
646 printf("standard timeslicing quantum is %d us\n", std_quantum_us
);
648 if (default_bg_preemption_rate
< 1)
649 default_bg_preemption_rate
= DEFAULT_BG_PREEMPTION_RATE
;
650 bg_quantum_us
= (1000 * 1000) / default_bg_preemption_rate
;
652 printf("standard background quantum is %d us\n", bg_quantum_us
);
660 sched_traditional_timebase_init(void)
665 /* standard timeslicing quantum */
666 clock_interval_to_absolutetime_interval(
667 std_quantum_us
, NSEC_PER_USEC
, &abstime
);
668 assert((abstime
>> 32) == 0 && (uint32_t)abstime
!= 0);
669 std_quantum
= (uint32_t)abstime
;
671 /* smallest remaining quantum (250 us) */
672 clock_interval_to_absolutetime_interval(250, NSEC_PER_USEC
, &abstime
);
673 assert((abstime
>> 32) == 0 && (uint32_t)abstime
!= 0);
674 min_std_quantum
= (uint32_t)abstime
;
676 /* quantum for background tasks */
677 clock_interval_to_absolutetime_interval(
678 bg_quantum_us
, NSEC_PER_USEC
, &abstime
);
679 assert((abstime
>> 32) == 0 && (uint32_t)abstime
!= 0);
680 bg_quantum
= (uint32_t)abstime
;
682 /* scheduler tick interval */
683 clock_interval_to_absolutetime_interval(USEC_PER_SEC
>> SCHED_TICK_SHIFT
,
684 NSEC_PER_USEC
, &abstime
);
685 assert((abstime
>> 32) == 0 && (uint32_t)abstime
!= 0);
686 sched_tick_interval
= (uint32_t)abstime
;
689 * Compute conversion factor from usage to
690 * timesharing priorities with 5/8 ** n aging.
692 abstime
= (abstime
* 5) / 3;
693 for (shift
= 0; abstime
> BASEPRI_DEFAULT
; ++shift
)
695 sched_fixed_shift
= shift
;
697 max_unsafe_computation
= ((uint64_t)max_unsafe_quanta
) * std_quantum
;
698 sched_safe_duration
= 2 * ((uint64_t)max_unsafe_quanta
) * std_quantum
;
700 max_poll_computation
= ((uint64_t)max_poll_quanta
) * std_quantum
;
701 thread_depress_time
= 1 * std_quantum
;
702 default_timeshare_computation
= std_quantum
/ 2;
703 default_timeshare_constraint
= std_quantum
;
705 #if defined(CONFIG_TELEMETRY)
706 /* interval for high frequency telemetry */
707 clock_interval_to_absolutetime_interval(10, NSEC_PER_MSEC
, &abstime
);
708 assert((abstime
>> 32) == 0 && (uint32_t)abstime
!= 0);
709 sched_telemetry_interval
= (uint32_t)abstime
;
713 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
716 #if defined(CONFIG_SCHED_TRADITIONAL)
719 sched_traditional_processor_init(processor_t processor
)
721 if (!sched_traditional_use_pset_runqueue
) {
722 run_queue_init(&processor
->runq
);
724 processor
->runq_bound_count
= 0;
728 sched_traditional_pset_init(processor_set_t pset
)
730 if (sched_traditional_use_pset_runqueue
) {
731 run_queue_init(&pset
->pset_runq
);
733 pset
->pset_runq_bound_count
= 0;
737 sched_traditional_with_pset_runqueue_init(void)
739 sched_traditional_init();
740 sched_traditional_use_pset_runqueue
= TRUE
;
743 #endif /* CONFIG_SCHED_TRADITIONAL */
745 #if defined(CONFIG_SCHED_FAIRSHARE_CORE)
747 sched_traditional_fairshare_init(void)
749 simple_lock_init(&fs_lock
, 0);
752 queue_init(&fs_runq
.queue
);
754 #endif /* CONFIG_SCHED_FAIRSHARE_CORE */
757 sched_realtime_init(void)
759 simple_lock_init(&rt_lock
, 0);
762 queue_init(&rt_runq
.queue
);
766 sched_realtime_timebase_init(void)
770 /* smallest rt computaton (50 us) */
771 clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC
, &abstime
);
772 assert((abstime
>> 32) == 0 && (uint32_t)abstime
!= 0);
773 min_rt_quantum
= (uint32_t)abstime
;
775 /* maximum rt computation (50 ms) */
776 clock_interval_to_absolutetime_interval(
777 50, 1000*NSEC_PER_USEC
, &abstime
);
778 assert((abstime
>> 32) == 0 && (uint32_t)abstime
!= 0);
779 max_rt_quantum
= (uint32_t)abstime
;
783 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
786 * Set up values for timeshare
790 load_shift_init(void)
792 int8_t k
, *p
= sched_load_shifts
;
795 uint32_t sched_decay_penalty
= 1;
797 if (PE_parse_boot_argn("sched_decay_penalty", &sched_decay_penalty
, sizeof (sched_decay_penalty
))) {
798 kprintf("Overriding scheduler decay penalty %u\n", sched_decay_penalty
);
801 if (PE_parse_boot_argn("sched_decay_usage_age_factor", &sched_decay_usage_age_factor
, sizeof (sched_decay_usage_age_factor
))) {
802 kprintf("Overriding scheduler decay usage age factor %u\n", sched_decay_usage_age_factor
);
805 if (PE_parse_boot_argn("sched_use_combined_fgbg_decay", &sched_use_combined_fgbg_decay
, sizeof (sched_use_combined_fgbg_decay
))) {
806 kprintf("Overriding schedule fg/bg decay calculation: %u\n", sched_use_combined_fgbg_decay
);
809 if (sched_decay_penalty
== 0) {
811 * There is no penalty for timeshare threads for using too much
812 * CPU, so set all load shifts to INT8_MIN. Even under high load,
813 * sched_pri_shift will be >INT8_MAX, and there will be no
814 * penalty applied to threads (nor will sched_usage be updated per
817 for (i
= 0; i
< NRQS
; i
++) {
818 sched_load_shifts
[i
] = INT8_MIN
;
824 *p
++ = INT8_MIN
; *p
++ = 0;
827 * For a given system load "i", the per-thread priority
828 * penalty per quantum of CPU usage is ~2^k priority
829 * levels. "sched_decay_penalty" can cause more
830 * array entries to be filled with smaller "k" values
832 for (i
= 2, j
= 1 << sched_decay_penalty
, k
= 1; i
< NRQS
; ++k
) {
833 for (j
<<= 1; (i
< j
) && (i
< NRQS
); ++i
)
839 preempt_pri_init(void)
841 int i
, *p
= sched_preempt_pri
;
843 for (i
= BASEPRI_FOREGROUND
; i
< MINPRI_KERNEL
; ++i
)
846 for (i
= BASEPRI_PREEMPT
; i
<= MAXPRI
; ++i
)
850 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
853 * Thread wait timer expiration.
860 thread_t thread
= p0
;
865 if (--thread
->wait_timer_active
== 0) {
866 if (thread
->wait_timer_is_set
) {
867 thread
->wait_timer_is_set
= FALSE
;
868 clear_wait_internal(thread
, THREAD_TIMED_OUT
);
871 thread_unlock(thread
);
878 * Unblock thread on wake up.
880 * Returns TRUE if the thread is still running.
882 * Thread must be locked.
887 wait_result_t wresult
)
889 boolean_t result
= FALSE
;
890 thread_t cthread
= current_thread();
891 uint32_t new_run_count
;
896 thread
->wait_result
= wresult
;
899 * Cancel pending wait timer.
901 if (thread
->wait_timer_is_set
) {
902 if (timer_call_cancel(&thread
->wait_timer
))
903 thread
->wait_timer_active
--;
904 thread
->wait_timer_is_set
= FALSE
;
908 * Update scheduling state: not waiting,
911 thread
->state
&= ~(TH_WAIT
|TH_UNINT
);
913 if (!(thread
->state
& TH_RUN
)) {
914 thread
->state
|= TH_RUN
;
916 (*thread
->sched_call
)(SCHED_CALL_UNBLOCK
, thread
);
921 new_run_count
= sched_run_incr(thread
);
922 if (thread
->sched_mode
== TH_MODE_TIMESHARE
) {
923 sched_share_incr(thread
);
925 if (thread
->sched_flags
& TH_SFLAG_THROTTLED
)
926 sched_background_incr(thread
);
931 * Signal if idling on another processor.
933 #if CONFIG_SCHED_IDLE_IN_PLACE
934 if (thread
->state
& TH_IDLE
) {
935 processor_t processor
= thread
->last_processor
;
937 if (processor
!= current_processor())
938 machine_signal_idle(processor
);
941 assert((thread
->state
& TH_IDLE
) == 0);
944 new_run_count
= sched_run_count
; /* updated in thread_select_idle() */
949 * Calculate deadline for real-time threads.
951 if (thread
->sched_mode
== TH_MODE_REALTIME
) {
954 ctime
= mach_absolute_time();
955 thread
->realtime
.deadline
= thread
->realtime
.constraint
+ ctime
;
959 * Clear old quantum, fail-safe computation, etc.
961 thread
->quantum_remaining
= 0;
962 thread
->computation_metered
= 0;
963 thread
->reason
= AST_NONE
;
965 /* Obtain power-relevant interrupt and "platform-idle exit" statistics.
966 * We also account for "double hop" thread signaling via
967 * the thread callout infrastructure.
968 * DRK: consider removing the callout wakeup counters in the future
969 * they're present for verification at the moment.
971 boolean_t aticontext
, pidle
;
972 ml_get_power_state(&aticontext
, &pidle
);
974 if (__improbable(aticontext
&& !(thread_get_tag_internal(thread
) & THREAD_TAG_CALLOUT
))) {
975 ledger_credit(thread
->t_ledger
, task_ledgers
.interrupt_wakeups
, 1);
976 DTRACE_SCHED2(iwakeup
, struct thread
*, thread
, struct proc
*, thread
->task
->bsd_info
);
978 uint64_t ttd
= PROCESSOR_DATA(current_processor(), timer_call_ttd
);
981 if (ttd
<= timer_deadline_tracking_bin_1
)
982 thread
->thread_timer_wakeups_bin_1
++;
984 if (ttd
<= timer_deadline_tracking_bin_2
)
985 thread
->thread_timer_wakeups_bin_2
++;
989 ledger_credit(thread
->t_ledger
, task_ledgers
.platform_idle_wakeups
, 1);
992 } else if (thread_get_tag_internal(cthread
) & THREAD_TAG_CALLOUT
) {
993 if (cthread
->callout_woken_from_icontext
) {
994 ledger_credit(thread
->t_ledger
, task_ledgers
.interrupt_wakeups
, 1);
995 thread
->thread_callout_interrupt_wakeups
++;
996 if (cthread
->callout_woken_from_platform_idle
) {
997 ledger_credit(thread
->t_ledger
, task_ledgers
.platform_idle_wakeups
, 1);
998 thread
->thread_callout_platform_idle_wakeups
++;
1001 cthread
->callout_woke_thread
= TRUE
;
1005 if (thread_get_tag_internal(thread
) & THREAD_TAG_CALLOUT
) {
1006 thread
->callout_woken_from_icontext
= aticontext
;
1007 thread
->callout_woken_from_platform_idle
= pidle
;
1008 thread
->callout_woke_thread
= FALSE
;
1011 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
1012 MACHDBG_CODE(DBG_MACH_SCHED
,MACH_MAKE_RUNNABLE
) | DBG_FUNC_NONE
,
1013 (uintptr_t)thread_tid(thread
), thread
->sched_pri
, thread
->wait_result
, new_run_count
, 0);
1015 DTRACE_SCHED2(wakeup
, struct thread
*, thread
, struct proc
*, thread
->task
->bsd_info
);
1021 * Routine: thread_go
1023 * Unblock and dispatch thread.
1025 * thread lock held, IPC locks may be held.
1026 * thread must have been pulled from wait queue under same lock hold.
1028 * KERN_SUCCESS - Thread was set running
1029 * KERN_NOT_WAITING - Thread was not waiting
1034 wait_result_t wresult
)
1036 assert(thread
->at_safe_point
== FALSE
);
1037 assert(thread
->wait_event
== NO_EVENT64
);
1038 assert(thread
->wait_queue
== WAIT_QUEUE_NULL
);
1040 if ((thread
->state
& (TH_WAIT
|TH_TERMINATE
)) == TH_WAIT
) {
1041 if (!thread_unblock(thread
, wresult
))
1042 thread_setrun(thread
, SCHED_PREEMPT
| SCHED_TAILQ
);
1044 return (KERN_SUCCESS
);
1047 return (KERN_NOT_WAITING
);
1051 * Routine: thread_mark_wait_locked
1053 * Mark a thread as waiting. If, given the circumstances,
1054 * it doesn't want to wait (i.e. already aborted), then
1055 * indicate that in the return value.
1057 * at splsched() and thread is locked.
1061 thread_mark_wait_locked(
1063 wait_interrupt_t interruptible
)
1065 boolean_t at_safe_point
;
1067 assert(thread
== current_thread());
1070 * The thread may have certain types of interrupts/aborts masked
1071 * off. Even if the wait location says these types of interrupts
1072 * are OK, we have to honor mask settings (outer-scoped code may
1073 * not be able to handle aborts at the moment).
1075 if (interruptible
> (thread
->options
& TH_OPT_INTMASK
))
1076 interruptible
= thread
->options
& TH_OPT_INTMASK
;
1078 at_safe_point
= (interruptible
== THREAD_ABORTSAFE
);
1080 if ( interruptible
== THREAD_UNINT
||
1081 !(thread
->sched_flags
& TH_SFLAG_ABORT
) ||
1083 (thread
->sched_flags
& TH_SFLAG_ABORTSAFELY
))) {
1085 if ( !(thread
->state
& TH_TERMINATE
))
1086 DTRACE_SCHED(sleep
);
1088 thread
->state
|= (interruptible
) ? TH_WAIT
: (TH_WAIT
| TH_UNINT
);
1089 thread
->at_safe_point
= at_safe_point
;
1090 return (thread
->wait_result
= THREAD_WAITING
);
1093 if (thread
->sched_flags
& TH_SFLAG_ABORTSAFELY
)
1094 thread
->sched_flags
&= ~TH_SFLAG_ABORTED_MASK
;
1096 return (thread
->wait_result
= THREAD_INTERRUPTED
);
1100 * Routine: thread_interrupt_level
1102 * Set the maximum interruptible state for the
1103 * current thread. The effective value of any
1104 * interruptible flag passed into assert_wait
1105 * will never exceed this.
1107 * Useful for code that must not be interrupted,
1108 * but which calls code that doesn't know that.
1110 * The old interrupt level for the thread.
1114 thread_interrupt_level(
1115 wait_interrupt_t new_level
)
1117 thread_t thread
= current_thread();
1118 wait_interrupt_t result
= thread
->options
& TH_OPT_INTMASK
;
1120 thread
->options
= (thread
->options
& ~TH_OPT_INTMASK
) | (new_level
& TH_OPT_INTMASK
);
1126 * Check to see if an assert wait is possible, without actually doing one.
1127 * This is used by debug code in locks and elsewhere to verify that it is
1128 * always OK to block when trying to take a blocking lock (since waiting
1129 * for the actual assert_wait to catch the case may make it hard to detect
1133 assert_wait_possible(void)
1139 if(debug_mode
) return TRUE
; /* Always succeed in debug mode */
1142 thread
= current_thread();
1144 return (thread
== NULL
|| wait_queue_assert_possible(thread
));
1150 * Assert that the current thread is about to go to
1151 * sleep until the specified event occurs.
1156 wait_interrupt_t interruptible
)
1158 register wait_queue_t wq
;
1161 if(event
== NO_EVENT
)
1162 panic("assert_wait() called with NO_EVENT");
1164 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
1165 MACHDBG_CODE(DBG_MACH_SCHED
, MACH_WAIT
)|DBG_FUNC_NONE
,
1166 VM_KERNEL_UNSLIDE(event
), 0, 0, 0, 0);
1168 index
= wait_hash(event
);
1169 wq
= &wait_queues
[index
];
1170 return wait_queue_assert_wait(wq
, event
, interruptible
, 0);
1174 assert_wait_timeout(
1176 wait_interrupt_t interruptible
,
1178 uint32_t scale_factor
)
1180 thread_t thread
= current_thread();
1181 wait_result_t wresult
;
1182 wait_queue_t wqueue
;
1186 if(event
== NO_EVENT
)
1187 panic("assert_wait_timeout() called with NO_EVENT");
1189 wqueue
= &wait_queues
[wait_hash(event
)];
1192 wait_queue_lock(wqueue
);
1193 thread_lock(thread
);
1195 clock_interval_to_deadline(interval
, scale_factor
, &deadline
);
1197 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
1198 MACHDBG_CODE(DBG_MACH_SCHED
, MACH_WAIT
)|DBG_FUNC_NONE
,
1199 VM_KERNEL_UNSLIDE(event
), interruptible
, deadline
, 0, 0);
1201 wresult
= wait_queue_assert_wait64_locked(wqueue
, CAST_DOWN(event64_t
, event
),
1203 TIMEOUT_URGENCY_SYS_NORMAL
,
1207 thread_unlock(thread
);
1208 wait_queue_unlock(wqueue
);
1215 assert_wait_timeout_with_leeway(
1217 wait_interrupt_t interruptible
,
1218 wait_timeout_urgency_t urgency
,
1221 uint32_t scale_factor
)
1223 thread_t thread
= current_thread();
1224 wait_result_t wresult
;
1225 wait_queue_t wqueue
;
1232 now
= mach_absolute_time();
1233 clock_interval_to_absolutetime_interval(interval
, scale_factor
, &abstime
);
1234 deadline
= now
+ abstime
;
1236 clock_interval_to_absolutetime_interval(leeway
, scale_factor
, &slop
);
1238 if(event
== NO_EVENT
)
1239 panic("assert_wait_timeout_with_leeway() called with NO_EVENT");
1241 wqueue
= &wait_queues
[wait_hash(event
)];
1244 wait_queue_lock(wqueue
);
1245 thread_lock(thread
);
1247 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
1248 MACHDBG_CODE(DBG_MACH_SCHED
, MACH_WAIT
)|DBG_FUNC_NONE
,
1249 VM_KERNEL_UNSLIDE(event
), interruptible
, deadline
, 0, 0);
1251 wresult
= wait_queue_assert_wait64_locked(wqueue
, CAST_DOWN(event64_t
, event
),
1253 urgency
, deadline
, slop
,
1256 thread_unlock(thread
);
1257 wait_queue_unlock(wqueue
);
1264 assert_wait_deadline(
1266 wait_interrupt_t interruptible
,
1269 thread_t thread
= current_thread();
1270 wait_result_t wresult
;
1271 wait_queue_t wqueue
;
1274 assert(event
!= NO_EVENT
);
1275 wqueue
= &wait_queues
[wait_hash(event
)];
1278 wait_queue_lock(wqueue
);
1279 thread_lock(thread
);
1281 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
1282 MACHDBG_CODE(DBG_MACH_SCHED
, MACH_WAIT
)|DBG_FUNC_NONE
,
1283 VM_KERNEL_UNSLIDE(event
), interruptible
, deadline
, 0, 0);
1285 wresult
= wait_queue_assert_wait64_locked(wqueue
, CAST_DOWN(event64_t
,event
),
1287 TIMEOUT_URGENCY_SYS_NORMAL
, deadline
, 0,
1290 thread_unlock(thread
);
1291 wait_queue_unlock(wqueue
);
1298 assert_wait_deadline_with_leeway(
1300 wait_interrupt_t interruptible
,
1301 wait_timeout_urgency_t urgency
,
1305 thread_t thread
= current_thread();
1306 wait_result_t wresult
;
1307 wait_queue_t wqueue
;
1310 if(event
== NO_EVENT
)
1311 panic("assert_wait_deadline_with_leeway() called with NO_EVENT");
1313 wqueue
= &wait_queues
[wait_hash(event
)];
1316 wait_queue_lock(wqueue
);
1317 thread_lock(thread
);
1319 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
1320 MACHDBG_CODE(DBG_MACH_SCHED
, MACH_WAIT
)|DBG_FUNC_NONE
,
1321 VM_KERNEL_UNSLIDE(event
), interruptible
, deadline
, 0, 0);
1323 wresult
= wait_queue_assert_wait64_locked(wqueue
, CAST_DOWN(event64_t
,event
),
1325 urgency
, deadline
, leeway
,
1328 thread_unlock(thread
);
1329 wait_queue_unlock(wqueue
);
1338 * Return TRUE if a thread is running on a processor such that an AST
1339 * is needed to pull it out of userspace execution, or if executing in
1340 * the kernel, bring to a context switch boundary that would cause
1341 * thread state to be serialized in the thread PCB.
1343 * Thread locked, returns the same way. While locked, fields
1344 * like "state" cannot change. "runq" can change only from set to unset.
1346 static inline boolean_t
1347 thread_isoncpu(thread_t thread
)
1349 /* Not running or runnable */
1350 if (!(thread
->state
& TH_RUN
))
1353 /* Waiting on a runqueue, not currently running */
1354 /* TODO: This is invalid - it can get dequeued without thread lock, but not context switched. */
1355 if (thread
->runq
!= PROCESSOR_NULL
)
1359 * Thread must be running on a processor, or
1360 * about to run, or just did run. In all these
1361 * cases, an AST to the processor is needed
1362 * to guarantee that the thread is kicked out
1363 * of userspace and the processor has
1364 * context switched (and saved register state).
1372 * Force a preemption point for a thread and wait
1373 * for it to stop running on a CPU. If a stronger
1374 * guarantee is requested, wait until no longer
1375 * runnable. Arbitrates access among
1376 * multiple stop requests. (released by unstop)
1378 * The thread must enter a wait state and stop via a
1381 * Returns FALSE if interrupted.
1386 boolean_t until_not_runnable
)
1388 wait_result_t wresult
;
1389 spl_t s
= splsched();
1393 thread_lock(thread
);
1395 while (thread
->state
& TH_SUSP
) {
1396 thread
->wake_active
= TRUE
;
1397 thread_unlock(thread
);
1399 wresult
= assert_wait(&thread
->wake_active
, THREAD_ABORTSAFE
);
1400 wake_unlock(thread
);
1403 if (wresult
== THREAD_WAITING
)
1404 wresult
= thread_block(THREAD_CONTINUE_NULL
);
1406 if (wresult
!= THREAD_AWAKENED
)
1411 thread_lock(thread
);
1414 thread
->state
|= TH_SUSP
;
1416 while ((oncpu
= thread_isoncpu(thread
)) ||
1417 (until_not_runnable
&& (thread
->state
& TH_RUN
))) {
1418 processor_t processor
;
1421 assert(thread
->state
& TH_RUN
);
1422 processor
= thread
->chosen_processor
;
1423 cause_ast_check(processor
);
1426 thread
->wake_active
= TRUE
;
1427 thread_unlock(thread
);
1429 wresult
= assert_wait(&thread
->wake_active
, THREAD_ABORTSAFE
);
1430 wake_unlock(thread
);
1433 if (wresult
== THREAD_WAITING
)
1434 wresult
= thread_block(THREAD_CONTINUE_NULL
);
1436 if (wresult
!= THREAD_AWAKENED
) {
1437 thread_unstop(thread
);
1443 thread_lock(thread
);
1446 thread_unlock(thread
);
1447 wake_unlock(thread
);
1451 * We return with the thread unlocked. To prevent it from
1452 * transitioning to a runnable state (or from TH_RUN to
1453 * being on the CPU), the caller must ensure the thread
1454 * is stopped via an external means (such as an AST)
1463 * Release a previous stop request and set
1464 * the thread running if appropriate.
1466 * Use only after a successful stop operation.
1472 spl_t s
= splsched();
1475 thread_lock(thread
);
1477 if ((thread
->state
& (TH_RUN
|TH_WAIT
|TH_SUSP
)) == TH_SUSP
) {
1478 thread
->state
&= ~TH_SUSP
;
1479 thread_unblock(thread
, THREAD_AWAKENED
);
1481 thread_setrun(thread
, SCHED_PREEMPT
| SCHED_TAILQ
);
1484 if (thread
->state
& TH_SUSP
) {
1485 thread
->state
&= ~TH_SUSP
;
1487 if (thread
->wake_active
) {
1488 thread
->wake_active
= FALSE
;
1489 thread_unlock(thread
);
1491 thread_wakeup(&thread
->wake_active
);
1492 wake_unlock(thread
);
1499 thread_unlock(thread
);
1500 wake_unlock(thread
);
1507 * Wait for a thread to stop running. (non-interruptible)
1513 boolean_t until_not_runnable
)
1515 wait_result_t wresult
;
1517 processor_t processor
;
1518 spl_t s
= splsched();
1521 thread_lock(thread
);
1524 * Wait until not running on a CPU. If stronger requirement
1525 * desired, wait until not runnable. Assumption: if thread is
1526 * on CPU, then TH_RUN is set, so we're not waiting in any case
1527 * where the original, pure "TH_RUN" check would have let us
1530 while ((oncpu
= thread_isoncpu(thread
)) ||
1531 (until_not_runnable
&& (thread
->state
& TH_RUN
))) {
1534 assert(thread
->state
& TH_RUN
);
1535 processor
= thread
->chosen_processor
;
1536 cause_ast_check(processor
);
1539 thread
->wake_active
= TRUE
;
1540 thread_unlock(thread
);
1542 wresult
= assert_wait(&thread
->wake_active
, THREAD_UNINT
);
1543 wake_unlock(thread
);
1546 if (wresult
== THREAD_WAITING
)
1547 thread_block(THREAD_CONTINUE_NULL
);
1551 thread_lock(thread
);
1554 thread_unlock(thread
);
1555 wake_unlock(thread
);
1560 * Routine: clear_wait_internal
1562 * Clear the wait condition for the specified thread.
1563 * Start the thread executing if that is appropriate.
1565 * thread thread to awaken
1566 * result Wakeup result the thread should see
1569 * the thread is locked.
1571 * KERN_SUCCESS thread was rousted out a wait
1572 * KERN_FAILURE thread was waiting but could not be rousted
1573 * KERN_NOT_WAITING thread was not waiting
1575 __private_extern__ kern_return_t
1576 clear_wait_internal(
1578 wait_result_t wresult
)
1580 wait_queue_t wq
= thread
->wait_queue
;
1581 uint32_t i
= LockTimeOut
;
1584 if (wresult
== THREAD_INTERRUPTED
&& (thread
->state
& TH_UNINT
))
1585 return (KERN_FAILURE
);
1587 if (wq
!= WAIT_QUEUE_NULL
) {
1588 if (wait_queue_lock_try(wq
)) {
1589 wait_queue_pull_thread_locked(wq
, thread
, TRUE
);
1590 /* wait queue unlocked, thread still locked */
1593 thread_unlock(thread
);
1596 thread_lock(thread
);
1597 if (wq
!= thread
->wait_queue
)
1598 return (KERN_NOT_WAITING
);
1604 return (thread_go(thread
, wresult
));
1605 } while ((--i
> 0) || machine_timeout_suspended());
1607 panic("clear_wait_internal: deadlock: thread=%p, wq=%p, cpu=%d\n",
1608 thread
, wq
, cpu_number());
1610 return (KERN_FAILURE
);
1617 * Clear the wait condition for the specified thread. Start the thread
1618 * executing if that is appropriate.
1621 * thread thread to awaken
1622 * result Wakeup result the thread should see
1627 wait_result_t result
)
1633 thread_lock(thread
);
1634 ret
= clear_wait_internal(thread
, result
);
1635 thread_unlock(thread
);
1642 * thread_wakeup_prim:
1644 * Common routine for thread_wakeup, thread_wakeup_with_result,
1645 * and thread_wakeup_one.
1651 boolean_t one_thread
,
1652 wait_result_t result
)
1654 return (thread_wakeup_prim_internal(event
, one_thread
, result
, -1));
1659 thread_wakeup_prim_internal(
1661 boolean_t one_thread
,
1662 wait_result_t result
,
1665 register wait_queue_t wq
;
1668 if(event
== NO_EVENT
)
1669 panic("thread_wakeup_prim() called with NO_EVENT");
1671 index
= wait_hash(event
);
1672 wq
= &wait_queues
[index
];
1674 return (wait_queue_wakeup_one(wq
, event
, result
, priority
));
1676 return (wait_queue_wakeup_all(wq
, event
, result
));
1682 * Force the current thread to execute on the specified processor.
1683 * Takes effect after the next thread_block().
1685 * Returns the previous binding. PROCESSOR_NULL means
1688 * XXX - DO NOT export this to users - XXX
1692 processor_t processor
)
1694 thread_t self
= current_thread();
1701 /* <rdar://problem/15102234> */
1702 assert(self
->sched_pri
< BASEPRI_RTQUEUES
);
1704 prev
= self
->bound_processor
;
1705 self
->bound_processor
= processor
;
1707 thread_unlock(self
);
1713 /* Invoked prior to idle entry to determine if, on SMT capable processors, an SMT
1714 * rebalancing opportunity exists when a core is (instantaneously) idle, but
1715 * other SMT-capable cores may be over-committed. TODO: some possible negatives:
1716 * IPI thrash if this core does not remain idle following the load balancing ASTs
1717 * Idle "thrash", when IPI issue is followed by idle entry/core power down
1718 * followed by a wakeup shortly thereafter.
1721 /* Invoked with pset locked, returns with pset unlocked */
1722 #if (DEVELOPMENT || DEBUG)
1723 int sched_smt_balance
= 1;
1727 sched_SMT_balance(processor_t cprocessor
, processor_set_t cpset
) {
1728 processor_t ast_processor
= NULL
;
1730 #if (DEVELOPMENT || DEBUG)
1731 if (__improbable(sched_smt_balance
== 0))
1732 goto smt_balance_exit
;
1735 assert(cprocessor
== current_processor());
1736 if (cprocessor
->is_SMT
== FALSE
)
1737 goto smt_balance_exit
;
1739 processor_t sib_processor
= cprocessor
->processor_secondary
? cprocessor
->processor_secondary
: cprocessor
->processor_primary
;
1741 /* Determine if both this processor and its sibling are idle,
1742 * indicating an SMT rebalancing opportunity.
1744 if (sib_processor
->state
!= PROCESSOR_IDLE
)
1745 goto smt_balance_exit
;
1747 processor_t sprocessor
;
1749 sprocessor
= (processor_t
)queue_first(&cpset
->active_queue
);
1751 while (!queue_end(&cpset
->active_queue
, (queue_entry_t
)sprocessor
)) {
1752 if ((sprocessor
->state
== PROCESSOR_RUNNING
) &&
1753 (sprocessor
->processor_primary
!= sprocessor
) &&
1754 (sprocessor
->processor_primary
->state
== PROCESSOR_RUNNING
) &&
1755 (sprocessor
->current_pri
< BASEPRI_RTQUEUES
) &&
1756 ((cpset
->pending_AST_cpu_mask
& (1U << sprocessor
->cpu_id
)) == 0)) {
1757 assert(sprocessor
!= cprocessor
);
1758 ast_processor
= sprocessor
;
1761 sprocessor
= (processor_t
)queue_next((queue_entry_t
)sprocessor
);
1767 if (ast_processor
) {
1768 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_SCHED_SMT_BALANCE
), ast_processor
->cpu_id
, ast_processor
->state
, ast_processor
->processor_primary
->state
, 0, 0);
1769 cause_ast_check(ast_processor
);
1776 * Select a new thread for the current processor to execute.
1778 * May select the current thread, which must be locked.
1783 processor_t processor
,
1786 processor_set_t pset
= processor
->processor_set
;
1787 thread_t new_thread
= THREAD_NULL
;
1789 assert(processor
== current_processor());
1793 * Update the priority.
1795 if (SCHED(can_update_priority
)(thread
))
1796 SCHED(update_priority
)(thread
);
1798 processor
->current_pri
= thread
->sched_pri
;
1799 processor
->current_thmode
= thread
->sched_mode
;
1800 processor
->current_sfi_class
= thread
->sfi_class
;
1804 assert(processor
->state
!= PROCESSOR_OFF_LINE
);
1806 if (processor
->processor_primary
!= processor
) {
1808 * Should this secondary SMT processor attempt to find work? For pset runqueue systems,
1809 * we should look for work only under the same conditions that choose_processor()
1810 * would have assigned work, which is when all primary processors have been assigned work.
1812 * An exception is that bound threads are dispatched to a processor without going through
1813 * choose_processor(), so in those cases we should continue trying to dequeue work.
1815 if (!SCHED(processor_bound_count
)(processor
) && !queue_empty(&pset
->idle_queue
) && !rt_runq
.count
) {
1820 simple_lock(&rt_lock
);
1823 * Test to see if the current thread should continue
1824 * to run on this processor. Must be runnable, and not
1825 * bound to a different processor, nor be in the wrong
1828 if (((thread
->state
& ~TH_SUSP
) == TH_RUN
) &&
1829 (thread
->sched_pri
>= BASEPRI_RTQUEUES
|| processor
->processor_primary
== processor
) &&
1830 (thread
->bound_processor
== PROCESSOR_NULL
|| thread
->bound_processor
== processor
) &&
1831 (thread
->affinity_set
== AFFINITY_SET_NULL
|| thread
->affinity_set
->aset_pset
== pset
)) {
1832 if (thread
->sched_pri
>= BASEPRI_RTQUEUES
&& first_timeslice(processor
)) {
1833 if (rt_runq
.count
> 0) {
1836 next_rt
= (thread_t
)queue_first(&rt_runq
.queue
);
1837 if (next_rt
->realtime
.deadline
< processor
->deadline
&&
1838 (next_rt
->bound_processor
== PROCESSOR_NULL
|| next_rt
->bound_processor
== processor
)) {
1839 thread
= (thread_t
)dequeue_head(&rt_runq
.queue
);
1840 thread
->runq
= PROCESSOR_NULL
;
1841 SCHED_STATS_RUNQ_CHANGE(&rt_runq
.runq_stats
, rt_runq
.count
);
1846 simple_unlock(&rt_lock
);
1848 processor
->deadline
= thread
->realtime
.deadline
;
1855 if ((thread
->sched_mode
!= TH_MODE_FAIRSHARE
|| SCHED(fairshare_runq_count
)() == 0) && (rt_runq
.count
== 0 || BASEPRI_RTQUEUES
< thread
->sched_pri
) && (new_thread
= SCHED(choose_thread
)(processor
, thread
->sched_mode
== TH_MODE_FAIRSHARE
? MINPRI
: thread
->sched_pri
, reason
)) == THREAD_NULL
) {
1857 simple_unlock(&rt_lock
);
1859 /* This thread is still the highest priority runnable (non-idle) thread */
1861 processor
->deadline
= UINT64_MAX
;
1869 if (new_thread
!= THREAD_NULL
||
1870 (SCHED(processor_queue_has_priority
)(processor
, rt_runq
.count
== 0 ? IDLEPRI
: BASEPRI_RTQUEUES
, TRUE
) &&
1871 (new_thread
= SCHED(choose_thread
)(processor
, MINPRI
, reason
)) != THREAD_NULL
)) {
1872 simple_unlock(&rt_lock
);
1874 processor
->deadline
= UINT64_MAX
;
1877 return (new_thread
);
1880 if (rt_runq
.count
> 0) {
1881 thread_t next_rt
= (thread_t
)queue_first(&rt_runq
.queue
);
1883 if (__probable((next_rt
->bound_processor
== NULL
|| (next_rt
->bound_processor
== processor
)))) {
1884 thread
= (thread_t
)dequeue_head(&rt_runq
.queue
);
1886 thread
->runq
= PROCESSOR_NULL
;
1887 SCHED_STATS_RUNQ_CHANGE(&rt_runq
.runq_stats
, rt_runq
.count
);
1890 simple_unlock(&rt_lock
);
1892 processor
->deadline
= thread
->realtime
.deadline
;
1899 simple_unlock(&rt_lock
);
1901 /* No realtime threads and no normal threads on the per-processor
1902 * runqueue. Finally check for global fairshare threads.
1904 if ((new_thread
= SCHED(fairshare_dequeue
)()) != THREAD_NULL
) {
1906 processor
->deadline
= UINT64_MAX
;
1909 return (new_thread
);
1912 processor
->deadline
= UINT64_MAX
;
1915 * No runnable threads, attempt to steal
1916 * from other processors.
1918 new_thread
= SCHED(steal_thread
)(pset
);
1919 if (new_thread
!= THREAD_NULL
) {
1920 return (new_thread
);
1924 * If other threads have appeared, shortcut
1927 if (!SCHED(processor_queue_empty
)(processor
) || rt_runq
.count
> 0 || SCHED(fairshare_runq_count
)() > 0)
1934 * Nothing is runnable, so set this processor idle if it
1937 if (processor
->state
== PROCESSOR_RUNNING
) {
1938 remqueue((queue_entry_t
)processor
);
1939 processor
->state
= PROCESSOR_IDLE
;
1941 if (processor
->processor_primary
== processor
) {
1942 enqueue_head(&pset
->idle_queue
, (queue_entry_t
)processor
);
1945 enqueue_head(&pset
->idle_secondary_queue
, (queue_entry_t
)processor
);
1949 /* Invoked with pset locked, returns with pset unlocked */
1950 sched_SMT_balance(processor
, pset
);
1952 #if CONFIG_SCHED_IDLE_IN_PLACE
1954 * Choose idle thread if fast idle is not possible.
1956 if (processor
->processor_primary
!= processor
)
1957 return (processor
->idle_thread
);
1959 if ((thread
->state
& (TH_IDLE
|TH_TERMINATE
|TH_SUSP
)) || !(thread
->state
& TH_WAIT
) || thread
->wake_active
|| thread
->sched_pri
>= BASEPRI_RTQUEUES
)
1960 return (processor
->idle_thread
);
1963 * Perform idling activities directly without a
1964 * context switch. Return dispatched thread,
1965 * else check again for a runnable thread.
1967 new_thread
= thread_select_idle(thread
, processor
);
1969 #else /* !CONFIG_SCHED_IDLE_IN_PLACE */
1972 * Do a full context switch to idle so that the current
1973 * thread can start running on another processor without
1974 * waiting for the fast-idled processor to wake up.
1976 return (processor
->idle_thread
);
1978 #endif /* !CONFIG_SCHED_IDLE_IN_PLACE */
1980 } while (new_thread
== THREAD_NULL
);
1982 return (new_thread
);
1985 #if CONFIG_SCHED_IDLE_IN_PLACE
1987 * thread_select_idle:
1989 * Idle the processor using the current thread context.
1991 * Called with thread locked, then dropped and relocked.
1996 processor_t processor
)
1998 thread_t new_thread
;
1999 uint64_t arg1
, arg2
;
2002 if (thread
->sched_mode
== TH_MODE_TIMESHARE
) {
2003 if (thread
->sched_flags
& TH_SFLAG_THROTTLED
)
2004 sched_background_decr(thread
);
2006 sched_share_decr(thread
);
2008 sched_run_decr(thread
);
2010 thread
->state
|= TH_IDLE
;
2011 processor
->current_pri
= IDLEPRI
;
2012 processor
->current_thmode
= TH_MODE_NONE
;
2013 processor
->current_sfi_class
= SFI_CLASS_KERNEL
;
2015 /* Reload precise timing global policy to thread-local policy */
2016 thread
->precise_user_kernel_time
= use_precise_user_kernel_time(thread
);
2018 thread_unlock(thread
);
2021 * Switch execution timing to processor idle thread.
2023 processor
->last_dispatch
= mach_absolute_time();
2025 #ifdef CONFIG_MACH_APPROXIMATE_TIME
2026 commpage_update_mach_approximate_time(processor
->last_dispatch
);
2029 thread
->last_run_time
= processor
->last_dispatch
;
2030 thread_timer_event(processor
->last_dispatch
, &processor
->idle_thread
->system_timer
);
2031 PROCESSOR_DATA(processor
, kernel_timer
) = &processor
->idle_thread
->system_timer
;
2034 * Cancel the quantum timer while idling.
2036 timer_call_cancel(&processor
->quantum_timer
);
2037 processor
->timeslice
= 0;
2039 (*thread
->sched_call
)(SCHED_CALL_BLOCK
, thread
);
2041 thread_tell_urgency(THREAD_URGENCY_NONE
, 0, 0, NULL
);
2044 * Enable interrupts and perform idling activities. No
2045 * preemption due to TH_IDLE being set.
2047 spllo(); new_thread
= processor_idle(thread
, processor
);
2050 * Return at splsched.
2052 (*thread
->sched_call
)(SCHED_CALL_UNBLOCK
, thread
);
2054 thread_lock(thread
);
2057 * If awakened, switch to thread timer and start a new quantum.
2058 * Otherwise skip; we will context switch to another thread or return here.
2060 if (!(thread
->state
& TH_WAIT
)) {
2061 processor
->last_dispatch
= mach_absolute_time();
2062 thread_timer_event(processor
->last_dispatch
, &thread
->system_timer
);
2063 PROCESSOR_DATA(processor
, kernel_timer
) = &thread
->system_timer
;
2065 thread_quantum_init(thread
);
2066 processor
->quantum_end
= processor
->last_dispatch
+ thread
->quantum_remaining
;
2067 timer_call_enter1(&processor
->quantum_timer
, thread
, processor
->quantum_end
, TIMER_CALL_SYS_CRITICAL
| TIMER_CALL_LOCAL
);
2068 processor
->timeslice
= 1;
2070 thread
->computation_epoch
= processor
->last_dispatch
;
2073 thread
->state
&= ~TH_IDLE
;
2076 * If we idled in place, simulate a context switch back
2077 * to the original priority of the thread so that the
2078 * platform layer cannot distinguish this from a true
2079 * switch to the idle thread.
2082 urgency
= thread_get_urgency(thread
, &arg1
, &arg2
);
2084 thread_tell_urgency(urgency
, arg1
, arg2
, new_thread
);
2086 sched_run_incr(thread
);
2087 if (thread
->sched_mode
== TH_MODE_TIMESHARE
) {
2088 sched_share_incr(thread
);
2090 if (thread
->sched_flags
& TH_SFLAG_THROTTLED
)
2091 sched_background_incr(thread
);
2094 return (new_thread
);
2096 #endif /* CONFIG_SCHED_IDLE_IN_PLACE */
2098 #if defined(CONFIG_SCHED_TRADITIONAL)
2100 sched_traditional_choose_thread(
2101 processor_t processor
,
2103 __unused ast_t reason
)
2107 thread
= choose_thread_from_runq(processor
, runq_for_processor(processor
), priority
);
2108 if (thread
!= THREAD_NULL
) {
2109 runq_consider_decr_bound_count(processor
, thread
);
2115 #endif /* defined(CONFIG_SCHED_TRADITIONAL) */
2117 #if defined(CONFIG_SCHED_TRADITIONAL)
2120 * choose_thread_from_runq:
2122 * Locate a thread to execute from the processor run queue
2123 * and return it. Only choose a thread with greater or equal
2126 * Associated pset must be locked. Returns THREAD_NULL
2130 choose_thread_from_runq(
2131 processor_t processor
,
2135 queue_t queue
= rq
->queues
+ rq
->highq
;
2136 int pri
= rq
->highq
, count
= rq
->count
;
2139 while (count
> 0 && pri
>= priority
) {
2140 thread
= (thread_t
)queue_first(queue
);
2141 while (!queue_end(queue
, (queue_entry_t
)thread
)) {
2142 if (thread
->bound_processor
== PROCESSOR_NULL
||
2143 thread
->bound_processor
== processor
) {
2144 remqueue((queue_entry_t
)thread
);
2146 thread
->runq
= PROCESSOR_NULL
;
2147 SCHED_STATS_RUNQ_CHANGE(&rq
->runq_stats
, rq
->count
);
2149 if (SCHED(priority_is_urgent
)(pri
)) {
2150 rq
->urgency
--; assert(rq
->urgency
>= 0);
2152 if (queue_empty(queue
)) {
2154 clrbit(MAXPRI
- pri
, rq
->bitmap
);
2155 rq
->highq
= MAXPRI
- ffsbit(rq
->bitmap
);
2162 thread
= (thread_t
)queue_next((queue_entry_t
)thread
);
2168 return (THREAD_NULL
);
2171 #endif /* defined(CONFIG_SCHED_TRADITIONAL) */
2174 * Perform a context switch and start executing the new thread.
2176 * Returns FALSE on failure, and the thread is re-dispatched.
2178 * Called at splsched.
2184 * "self" is what is currently running on the processor,
2185 * "thread" is the new thread to context switch to
2186 * (which may be the same thread in some cases)
2194 thread_continue_t continuation
= self
->continuation
;
2195 void *parameter
= self
->parameter
;
2196 processor_t processor
;
2197 uint64_t ctime
= mach_absolute_time();
2199 #ifdef CONFIG_MACH_APPROXIMATE_TIME
2200 commpage_update_mach_approximate_time(ctime
);
2203 if (__improbable(get_preemption_level() != 0)) {
2204 int pl
= get_preemption_level();
2205 panic("thread_invoke: preemption_level %d, possible cause: %s",
2206 pl
, (pl
< 0 ? "unlocking an unlocked mutex or spinlock" :
2207 "blocking while holding a spinlock, or within interrupt context"));
2210 assert(self
== current_thread());
2211 assert(self
->runq
== PROCESSOR_NULL
);
2213 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
2214 sched_traditional_consider_maintenance(ctime
);
2215 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
2218 * Mark thread interruptible.
2220 thread_lock(thread
);
2221 thread
->state
&= ~TH_UNINT
;
2223 assert(thread_runnable(thread
));
2224 assert(thread
->bound_processor
== PROCESSOR_NULL
|| thread
->bound_processor
== current_processor());
2225 assert(thread
->runq
== PROCESSOR_NULL
);
2227 /* Reload precise timing global policy to thread-local policy */
2228 thread
->precise_user_kernel_time
= use_precise_user_kernel_time(thread
);
2230 /* Update SFI class based on other factors */
2231 thread
->sfi_class
= sfi_thread_classify(thread
);
2234 * Allow time constraint threads to hang onto
2237 if ((self
->sched_mode
== TH_MODE_REALTIME
) && !self
->reserved_stack
)
2238 self
->reserved_stack
= self
->kernel_stack
;
2240 if (continuation
!= NULL
) {
2241 if (!thread
->kernel_stack
) {
2243 * If we are using a privileged stack,
2244 * check to see whether we can exchange it with
2245 * that of the other thread.
2247 if (self
->kernel_stack
== self
->reserved_stack
&& !thread
->reserved_stack
)
2251 * Context switch by performing a stack handoff.
2253 continuation
= thread
->continuation
;
2254 parameter
= thread
->parameter
;
2256 processor
= current_processor();
2257 processor
->active_thread
= thread
;
2258 processor
->current_pri
= thread
->sched_pri
;
2259 processor
->current_thmode
= thread
->sched_mode
;
2260 processor
->current_sfi_class
= thread
->sfi_class
;
2261 if (thread
->last_processor
!= processor
&& thread
->last_processor
!= NULL
) {
2262 if (thread
->last_processor
->processor_set
!= processor
->processor_set
)
2263 thread
->ps_switch
++;
2266 thread
->last_processor
= processor
;
2268 ast_context(thread
);
2269 thread_unlock(thread
);
2271 self
->reason
= reason
;
2273 processor
->last_dispatch
= ctime
;
2274 self
->last_run_time
= ctime
;
2275 thread_timer_event(ctime
, &thread
->system_timer
);
2276 PROCESSOR_DATA(processor
, kernel_timer
) = &thread
->system_timer
;
2279 * Since non-precise user/kernel time doesn't update the state timer
2280 * during privilege transitions, synthesize an event now.
2282 if (!thread
->precise_user_kernel_time
) {
2283 timer_switch(PROCESSOR_DATA(processor
, current_state
),
2285 PROCESSOR_DATA(processor
, current_state
));
2288 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
2289 MACHDBG_CODE(DBG_MACH_SCHED
, MACH_STACK_HANDOFF
)|DBG_FUNC_NONE
,
2290 self
->reason
, (uintptr_t)thread_tid(thread
), self
->sched_pri
, thread
->sched_pri
, 0);
2292 if ((thread
->chosen_processor
!= processor
) && (thread
->chosen_processor
!= PROCESSOR_NULL
)) {
2293 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_MOVED
)|DBG_FUNC_NONE
,
2294 (uintptr_t)thread_tid(thread
), (uintptr_t)thread
->chosen_processor
->cpu_id
, 0, 0, 0);
2297 DTRACE_SCHED2(off__cpu
, struct thread
*, thread
, struct proc
*, thread
->task
->bsd_info
);
2299 SCHED_STATS_CSW(processor
, self
->reason
, self
->sched_pri
, thread
->sched_pri
);
2301 TLOG(1, "thread_invoke: calling stack_handoff\n");
2302 stack_handoff(self
, thread
);
2304 DTRACE_SCHED(on__cpu
);
2306 thread_dispatch(self
, thread
);
2308 thread
->continuation
= thread
->parameter
= NULL
;
2310 counter(c_thread_invoke_hits
++);
2314 assert(continuation
);
2315 call_continuation(continuation
, parameter
, thread
->wait_result
);
2318 else if (thread
== self
) {
2319 /* same thread but with continuation */
2321 counter(++c_thread_invoke_same
);
2322 thread_unlock(self
);
2324 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
2325 MACHDBG_CODE(DBG_MACH_SCHED
,MACH_SCHED
) | DBG_FUNC_NONE
,
2326 self
->reason
, (uintptr_t)thread_tid(thread
), self
->sched_pri
, thread
->sched_pri
, 0);
2328 self
->continuation
= self
->parameter
= NULL
;
2332 call_continuation(continuation
, parameter
, self
->wait_result
);
2338 * Check that the other thread has a stack
2340 if (!thread
->kernel_stack
) {
2342 if (!stack_alloc_try(thread
)) {
2343 counter(c_thread_invoke_misses
++);
2344 thread_unlock(thread
);
2345 thread_stack_enqueue(thread
);
2349 else if (thread
== self
) {
2351 counter(++c_thread_invoke_same
);
2352 thread_unlock(self
);
2354 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
2355 MACHDBG_CODE(DBG_MACH_SCHED
,MACH_SCHED
) | DBG_FUNC_NONE
,
2356 self
->reason
, (uintptr_t)thread_tid(thread
), self
->sched_pri
, thread
->sched_pri
, 0);
2363 * Context switch by full context save.
2365 processor
= current_processor();
2366 processor
->active_thread
= thread
;
2367 processor
->current_pri
= thread
->sched_pri
;
2368 processor
->current_thmode
= thread
->sched_mode
;
2369 processor
->current_sfi_class
= thread
->sfi_class
;
2370 if (thread
->last_processor
!= processor
&& thread
->last_processor
!= NULL
) {
2371 if (thread
->last_processor
->processor_set
!= processor
->processor_set
)
2372 thread
->ps_switch
++;
2375 thread
->last_processor
= processor
;
2377 ast_context(thread
);
2378 thread_unlock(thread
);
2380 counter(c_thread_invoke_csw
++);
2382 assert(self
->runq
== PROCESSOR_NULL
);
2383 self
->reason
= reason
;
2385 processor
->last_dispatch
= ctime
;
2386 self
->last_run_time
= ctime
;
2387 thread_timer_event(ctime
, &thread
->system_timer
);
2388 PROCESSOR_DATA(processor
, kernel_timer
) = &thread
->system_timer
;
2391 * Since non-precise user/kernel time doesn't update the state timer
2392 * during privilege transitions, synthesize an event now.
2394 if (!thread
->precise_user_kernel_time
) {
2395 timer_switch(PROCESSOR_DATA(processor
, current_state
),
2397 PROCESSOR_DATA(processor
, current_state
));
2400 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
2401 MACHDBG_CODE(DBG_MACH_SCHED
,MACH_SCHED
) | DBG_FUNC_NONE
,
2402 self
->reason
, (uintptr_t)thread_tid(thread
), self
->sched_pri
, thread
->sched_pri
, 0);
2404 if ((thread
->chosen_processor
!= processor
) && (thread
->chosen_processor
!= NULL
)) {
2405 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_MOVED
)|DBG_FUNC_NONE
,
2406 (uintptr_t)thread_tid(thread
), (uintptr_t)thread
->chosen_processor
->cpu_id
, 0, 0, 0);
2409 DTRACE_SCHED2(off__cpu
, struct thread
*, thread
, struct proc
*, thread
->task
->bsd_info
);
2411 SCHED_STATS_CSW(processor
, self
->reason
, self
->sched_pri
, thread
->sched_pri
);
2414 * This is where we actually switch register context,
2415 * and address space if required. We will next run
2416 * as a result of a subsequent context switch.
2418 assert(continuation
== self
->continuation
);
2419 thread
= machine_switch_context(self
, continuation
, thread
);
2420 assert(self
== current_thread());
2421 TLOG(1,"thread_invoke: returning machine_switch_context: self %p continuation %p thread %p\n", self
, continuation
, thread
);
2423 DTRACE_SCHED(on__cpu
);
2426 * We have been resumed and are set to run.
2428 thread_dispatch(thread
, self
);
2431 self
->continuation
= self
->parameter
= NULL
;
2435 call_continuation(continuation
, parameter
, self
->wait_result
);
2445 * Handle threads at context switch. Re-dispatch other thread
2446 * if still running, otherwise update run state and perform
2447 * special actions. Update quantum for other thread and begin
2448 * the quantum for ourselves.
2450 * "self" is our new current thread that we have context switched
2451 * to, "thread" is the old thread that we have switched away from.
2453 * Called at splsched.
2460 processor_t processor
= self
->last_processor
;
2462 if (thread
!= THREAD_NULL
) {
2464 * If blocked at a continuation, discard
2467 if (thread
->continuation
!= NULL
&& thread
->kernel_stack
!= 0)
2470 if (!(thread
->state
& TH_IDLE
)) {
2472 int64_t remainder
= 0;
2474 if (processor
->quantum_end
> processor
->last_dispatch
)
2475 remainder
= processor
->quantum_end
-
2476 processor
->last_dispatch
;
2478 consumed
= thread
->quantum_remaining
- remainder
;
2480 if ((thread
->reason
& AST_LEDGER
) == 0) {
2482 * Bill CPU time to both the task and
2483 * the individual thread.
2485 ledger_credit(thread
->t_ledger
,
2486 task_ledgers
.cpu_time
, consumed
);
2487 ledger_credit(thread
->t_threadledger
,
2488 thread_ledgers
.cpu_time
, consumed
);
2490 if (thread
->t_bankledger
) {
2491 ledger_credit(thread
->t_bankledger
,
2492 bank_ledgers
.cpu_time
,
2493 (consumed
- thread
->t_deduct_bank_ledger_time
));
2496 thread
->t_deduct_bank_ledger_time
=0;
2501 thread_lock(thread
);
2504 * Compute remainder of current quantum.
2506 if (first_timeslice(processor
) &&
2507 processor
->quantum_end
> processor
->last_dispatch
)
2508 thread
->quantum_remaining
= (uint32_t)remainder
;
2510 thread
->quantum_remaining
= 0;
2512 if (thread
->sched_mode
== TH_MODE_REALTIME
) {
2514 * Cancel the deadline if the thread has
2515 * consumed the entire quantum.
2517 if (thread
->quantum_remaining
== 0) {
2518 thread
->realtime
.deadline
= UINT64_MAX
;
2521 #if defined(CONFIG_SCHED_TRADITIONAL)
2523 * For non-realtime threads treat a tiny
2524 * remaining quantum as an expired quantum
2525 * but include what's left next time.
2527 if (thread
->quantum_remaining
< min_std_quantum
) {
2528 thread
->reason
|= AST_QUANTUM
;
2529 thread
->quantum_remaining
+= SCHED(initial_quantum_size
)(thread
);
2535 * If we are doing a direct handoff then
2536 * take the remainder of the quantum.
2538 if ((thread
->reason
& (AST_HANDOFF
|AST_QUANTUM
)) == AST_HANDOFF
) {
2539 self
->quantum_remaining
= thread
->quantum_remaining
;
2540 thread
->reason
|= AST_QUANTUM
;
2541 thread
->quantum_remaining
= 0;
2543 #if defined(CONFIG_SCHED_MULTIQ)
2544 if (sched_groups_enabled
&& thread
->sched_group
== self
->sched_group
) {
2545 /* TODO: Remove tracepoint */
2546 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
2547 MACHDBG_CODE(DBG_MACH_SCHED
, MACH_QUANTUM_HANDOFF
) | DBG_FUNC_NONE
,
2548 self
->reason
, (uintptr_t)thread_tid(thread
),
2549 self
->quantum_remaining
, thread
->quantum_remaining
, 0);
2551 self
->quantum_remaining
= thread
->quantum_remaining
;
2552 thread
->quantum_remaining
= 0;
2553 /* TODO: Should we set AST_QUANTUM here? */
2555 #endif /* defined(CONFIG_SCHED_MULTIQ) */
2558 thread
->computation_metered
+= (processor
->last_dispatch
- thread
->computation_epoch
);
2560 if ((thread
->rwlock_count
!= 0) && !(LcksOpts
& disLkRWPrio
)) {
2563 priority
= thread
->sched_pri
;
2565 if (priority
< thread
->priority
)
2566 priority
= thread
->priority
;
2567 if (priority
< BASEPRI_BACKGROUND
)
2568 priority
= BASEPRI_BACKGROUND
;
2570 if ((thread
->sched_pri
< priority
) || !(thread
->sched_flags
& TH_SFLAG_RW_PROMOTED
)) {
2571 KERNEL_DEBUG_CONSTANT(
2572 MACHDBG_CODE(DBG_MACH_SCHED
, MACH_RW_PROMOTE
) | DBG_FUNC_NONE
,
2573 (uintptr_t)thread_tid(thread
), thread
->sched_pri
, thread
->priority
, priority
, 0);
2575 thread
->sched_flags
|= TH_SFLAG_RW_PROMOTED
;
2577 if (thread
->sched_pri
< priority
)
2578 set_sched_pri(thread
, priority
);
2582 if (!(thread
->state
& TH_WAIT
)) {
2586 if (thread
->reason
& AST_QUANTUM
)
2587 thread_setrun(thread
, SCHED_TAILQ
);
2589 if (thread
->reason
& AST_PREEMPT
)
2590 thread_setrun(thread
, SCHED_HEADQ
);
2592 thread_setrun(thread
, SCHED_PREEMPT
| SCHED_TAILQ
);
2594 thread
->reason
= AST_NONE
;
2596 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
2597 MACHDBG_CODE(DBG_MACH_SCHED
,MACH_DISPATCH
) | DBG_FUNC_NONE
,
2598 (uintptr_t)thread_tid(thread
), thread
->reason
, thread
->state
, sched_run_count
, 0);
2600 if (thread
->wake_active
) {
2601 thread
->wake_active
= FALSE
;
2602 thread_unlock(thread
);
2604 thread_wakeup(&thread
->wake_active
);
2607 thread_unlock(thread
);
2609 wake_unlock(thread
);
2615 boolean_t should_terminate
= FALSE
;
2616 uint32_t new_run_count
;
2618 /* Only the first call to thread_dispatch
2619 * after explicit termination should add
2620 * the thread to the termination queue
2622 if ((thread
->state
& (TH_TERMINATE
|TH_TERMINATE2
)) == TH_TERMINATE
) {
2623 should_terminate
= TRUE
;
2624 thread
->state
|= TH_TERMINATE2
;
2627 thread
->state
&= ~TH_RUN
;
2628 thread
->chosen_processor
= PROCESSOR_NULL
;
2630 if (thread
->sched_mode
== TH_MODE_TIMESHARE
) {
2631 if (thread
->sched_flags
& TH_SFLAG_THROTTLED
)
2632 sched_background_decr(thread
);
2634 sched_share_decr(thread
);
2636 new_run_count
= sched_run_decr(thread
);
2638 if ((thread
->state
& (TH_WAIT
| TH_TERMINATE
)) == TH_WAIT
) {
2639 if (thread
->reason
& AST_SFI
) {
2640 thread
->wait_sfi_begin_time
= processor
->last_dispatch
;
2644 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
2645 MACHDBG_CODE(DBG_MACH_SCHED
,MACH_DISPATCH
) | DBG_FUNC_NONE
,
2646 (uintptr_t)thread_tid(thread
), thread
->reason
, thread
->state
, new_run_count
, 0);
2648 (*thread
->sched_call
)(SCHED_CALL_BLOCK
, thread
);
2650 if (thread
->wake_active
) {
2651 thread
->wake_active
= FALSE
;
2652 thread_unlock(thread
);
2654 thread_wakeup(&thread
->wake_active
);
2657 thread_unlock(thread
);
2659 wake_unlock(thread
);
2661 if (should_terminate
)
2662 thread_terminate_enqueue(thread
);
2667 if (!(self
->state
& TH_IDLE
)) {
2668 uint64_t arg1
, arg2
;
2673 new_ast
= sfi_thread_needs_ast(self
, NULL
);
2674 thread_unlock(self
);
2676 if (new_ast
!= AST_NONE
) {
2680 urgency
= thread_get_urgency(self
, &arg1
, &arg2
);
2682 thread_tell_urgency(urgency
, arg1
, arg2
, self
);
2685 * Get a new quantum if none remaining.
2687 if (self
->quantum_remaining
== 0) {
2688 thread_quantum_init(self
);
2692 * Set up quantum timer and timeslice.
2694 processor
->quantum_end
= processor
->last_dispatch
+ self
->quantum_remaining
;
2695 timer_call_enter1(&processor
->quantum_timer
, self
, processor
->quantum_end
, TIMER_CALL_SYS_CRITICAL
| TIMER_CALL_LOCAL
);
2697 processor
->timeslice
= 1;
2699 self
->computation_epoch
= processor
->last_dispatch
;
2702 timer_call_cancel(&processor
->quantum_timer
);
2703 processor
->timeslice
= 0;
2705 thread_tell_urgency(THREAD_URGENCY_NONE
, 0, 0, NULL
);
2710 * thread_block_reason:
2712 * Forces a reschedule, blocking the caller if a wait
2713 * has been asserted.
2715 * If a continuation is specified, then thread_invoke will
2716 * attempt to discard the thread's kernel stack. When the
2717 * thread resumes, it will execute the continuation function
2718 * on a new kernel stack.
2720 counter(mach_counter_t c_thread_block_calls
= 0;)
2723 thread_block_reason(
2724 thread_continue_t continuation
,
2728 register thread_t self
= current_thread();
2729 register processor_t processor
;
2730 register thread_t new_thread
;
2733 counter(++c_thread_block_calls
);
2737 processor
= current_processor();
2739 /* If we're explicitly yielding, force a subsequent quantum */
2740 if (reason
& AST_YIELD
)
2741 processor
->timeslice
= 0;
2743 /* We're handling all scheduling AST's */
2744 ast_off(AST_SCHEDULING
);
2746 self
->continuation
= continuation
;
2747 self
->parameter
= parameter
;
2749 if (self
->state
& ~(TH_RUN
| TH_IDLE
)) {
2750 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
2751 MACHDBG_CODE(DBG_MACH_SCHED
,MACH_BLOCK
),
2752 reason
, VM_KERNEL_UNSLIDE(continuation
), 0, 0, 0);
2757 new_thread
= thread_select(self
, processor
, reason
);
2758 thread_unlock(self
);
2759 } while (!thread_invoke(self
, new_thread
, reason
));
2763 return (self
->wait_result
);
2769 * Block the current thread if a wait has been asserted.
2773 thread_continue_t continuation
)
2775 return thread_block_reason(continuation
, NULL
, AST_NONE
);
2779 thread_block_parameter(
2780 thread_continue_t continuation
,
2783 return thread_block_reason(continuation
, parameter
, AST_NONE
);
2789 * Switch directly from the current thread to the
2790 * new thread, handing off our quantum if appropriate.
2792 * New thread must be runnable, and not on a run queue.
2794 * Called at splsched.
2799 thread_continue_t continuation
,
2801 thread_t new_thread
)
2803 ast_t handoff
= AST_HANDOFF
;
2805 self
->continuation
= continuation
;
2806 self
->parameter
= parameter
;
2808 while (!thread_invoke(self
, new_thread
, handoff
)) {
2809 processor_t processor
= current_processor();
2812 new_thread
= thread_select(self
, processor
, AST_NONE
);
2813 thread_unlock(self
);
2817 return (self
->wait_result
);
2823 * Called at splsched when a thread first receives
2824 * a new stack after a continuation.
2828 register thread_t thread
)
2830 register thread_t self
= current_thread();
2831 register thread_continue_t continuation
;
2832 register void *parameter
;
2834 DTRACE_SCHED(on__cpu
);
2836 continuation
= self
->continuation
;
2837 parameter
= self
->parameter
;
2839 thread_dispatch(thread
, self
);
2841 self
->continuation
= self
->parameter
= NULL
;
2843 if (thread
!= THREAD_NULL
)
2846 TLOG(1, "thread_continue: calling call_continuation \n");
2847 call_continuation(continuation
, parameter
, self
->wait_result
);
2852 thread_quantum_init(thread_t thread
)
2854 if (thread
->sched_mode
== TH_MODE_REALTIME
) {
2855 thread
->quantum_remaining
= thread
->realtime
.computation
;
2857 thread
->quantum_remaining
= SCHED(initial_quantum_size
)(thread
);
2861 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
2864 sched_traditional_initial_quantum_size(thread_t thread
)
2866 if ((thread
== THREAD_NULL
) || !(thread
->sched_flags
& TH_SFLAG_THROTTLED
))
2872 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
2874 #if defined(CONFIG_SCHED_TRADITIONAL)
2877 sched_traditional_initial_thread_sched_mode(task_t parent_task
)
2879 if (parent_task
== kernel_task
)
2880 return TH_MODE_FIXED
;
2882 return TH_MODE_TIMESHARE
;
2885 #endif /* CONFIG_SCHED_TRADITIONAL */
2890 * Initialize a run queue before first use.
2898 rq
->highq
= IDLEPRI
;
2899 for (i
= 0; i
< NRQBM
; i
++)
2901 setbit(MAXPRI
- IDLEPRI
, rq
->bitmap
);
2902 rq
->urgency
= rq
->count
= 0;
2903 for (i
= 0; i
< NRQS
; i
++)
2904 queue_init(&rq
->queues
[i
]);
2907 #if defined(CONFIG_SCHED_FAIRSHARE_CORE)
2909 sched_traditional_fairshare_runq_count(void)
2911 return fs_runq
.count
;
2915 sched_traditional_fairshare_runq_stats_count_sum(void)
2917 return fs_runq
.runq_stats
.count_sum
;
2921 sched_traditional_fairshare_enqueue(thread_t thread
)
2923 queue_t queue
= &fs_runq
.queue
;
2925 simple_lock(&fs_lock
);
2927 enqueue_tail(queue
, (queue_entry_t
)thread
);
2929 thread
->runq
= FS_RUNQ
;
2930 SCHED_STATS_RUNQ_CHANGE(&fs_runq
.runq_stats
, fs_runq
.count
);
2933 simple_unlock(&fs_lock
);
2937 sched_traditional_fairshare_dequeue(void)
2941 simple_lock(&fs_lock
);
2942 if (fs_runq
.count
> 0) {
2943 thread
= (thread_t
)dequeue_head(&fs_runq
.queue
);
2945 thread
->runq
= PROCESSOR_NULL
;
2946 SCHED_STATS_RUNQ_CHANGE(&fs_runq
.runq_stats
, fs_runq
.count
);
2949 simple_unlock(&fs_lock
);
2953 simple_unlock(&fs_lock
);
2959 sched_traditional_fairshare_queue_remove(thread_t thread
)
2963 simple_lock(&fs_lock
);
2966 if (FS_RUNQ
== thread
->runq
) {
2967 remqueue((queue_entry_t
)thread
);
2968 SCHED_STATS_RUNQ_CHANGE(&fs_runq
.runq_stats
, fs_runq
.count
);
2971 thread
->runq
= PROCESSOR_NULL
;
2972 simple_unlock(&fs_lock
);
2977 * The thread left the run queue before we could
2978 * lock the run queue.
2980 assert(thread
->runq
== PROCESSOR_NULL
);
2981 simple_unlock(&fs_lock
);
2986 #endif /* CONFIG_SCHED_FAIRSHARE_CORE */
2989 * run_queue_dequeue:
2991 * Perform a dequeue operation on a run queue,
2992 * and return the resulting thread.
2994 * The run queue must be locked (see thread_run_queue_remove()
2995 * for more info), and not empty.
3003 queue_t queue
= rq
->queues
+ rq
->highq
;
3005 if (options
& SCHED_HEADQ
) {
3006 thread
= (thread_t
)dequeue_head(queue
);
3009 thread
= (thread_t
)dequeue_tail(queue
);
3012 thread
->runq
= PROCESSOR_NULL
;
3013 SCHED_STATS_RUNQ_CHANGE(&rq
->runq_stats
, rq
->count
);
3015 if (SCHED(priority_is_urgent
)(rq
->highq
)) {
3016 rq
->urgency
--; assert(rq
->urgency
>= 0);
3018 if (queue_empty(queue
)) {
3019 if (rq
->highq
!= IDLEPRI
)
3020 clrbit(MAXPRI
- rq
->highq
, rq
->bitmap
);
3021 rq
->highq
= MAXPRI
- ffsbit(rq
->bitmap
);
3028 * run_queue_enqueue:
3030 * Perform a enqueue operation on a run queue.
3032 * The run queue must be locked (see thread_run_queue_remove()
3041 queue_t queue
= rq
->queues
+ thread
->sched_pri
;
3042 boolean_t result
= FALSE
;
3044 if (queue_empty(queue
)) {
3045 enqueue_tail(queue
, (queue_entry_t
)thread
);
3047 setbit(MAXPRI
- thread
->sched_pri
, rq
->bitmap
);
3048 if (thread
->sched_pri
> rq
->highq
) {
3049 rq
->highq
= thread
->sched_pri
;
3053 if (options
& SCHED_TAILQ
)
3054 enqueue_tail(queue
, (queue_entry_t
)thread
);
3056 enqueue_head(queue
, (queue_entry_t
)thread
);
3058 if (SCHED(priority_is_urgent
)(thread
->sched_pri
))
3060 SCHED_STATS_RUNQ_CHANGE(&rq
->runq_stats
, rq
->count
);
3070 * Remove a specific thread from a runqueue.
3072 * The run queue must be locked.
3080 remqueue((queue_entry_t
)thread
);
3081 SCHED_STATS_RUNQ_CHANGE(&rq
->runq_stats
, rq
->count
);
3083 if (SCHED(priority_is_urgent
)(thread
->sched_pri
)) {
3084 rq
->urgency
--; assert(rq
->urgency
>= 0);
3087 if (queue_empty(rq
->queues
+ thread
->sched_pri
)) {
3088 /* update run queue status */
3089 if (thread
->sched_pri
!= IDLEPRI
)
3090 clrbit(MAXPRI
- thread
->sched_pri
, rq
->bitmap
);
3091 rq
->highq
= MAXPRI
- ffsbit(rq
->bitmap
);
3094 thread
->runq
= PROCESSOR_NULL
;
3100 * Dispatch a thread for round-robin execution.
3102 * Thread must be locked. Associated pset must
3103 * be locked, and is returned unlocked.
3107 processor_t processor
,
3110 processor_set_t pset
= processor
->processor_set
;
3112 thread
->chosen_processor
= processor
;
3114 SCHED(fairshare_enqueue
)(thread
);
3118 if (processor
!= current_processor())
3119 machine_signal_idle(processor
);
3125 * realtime_queue_insert:
3127 * Enqueue a thread for realtime execution.
3130 realtime_queue_insert(
3133 queue_t queue
= &rt_runq
.queue
;
3134 uint64_t deadline
= thread
->realtime
.deadline
;
3135 boolean_t preempt
= FALSE
;
3137 simple_lock(&rt_lock
);
3139 if (queue_empty(queue
)) {
3140 enqueue_tail(queue
, (queue_entry_t
)thread
);
3144 register thread_t entry
= (thread_t
)queue_first(queue
);
3147 if ( queue_end(queue
, (queue_entry_t
)entry
) ||
3148 deadline
< entry
->realtime
.deadline
) {
3149 entry
= (thread_t
)queue_prev((queue_entry_t
)entry
);
3153 entry
= (thread_t
)queue_next((queue_entry_t
)entry
);
3156 if ((queue_entry_t
)entry
== queue
)
3159 insque((queue_entry_t
)thread
, (queue_entry_t
)entry
);
3162 thread
->runq
= RT_RUNQ
;
3163 SCHED_STATS_RUNQ_CHANGE(&rt_runq
.runq_stats
, rt_runq
.count
);
3166 simple_unlock(&rt_lock
);
3174 * Dispatch a thread for realtime execution.
3176 * Thread must be locked. Associated pset must
3177 * be locked, and is returned unlocked.
3181 processor_t processor
,
3184 processor_set_t pset
= processor
->processor_set
;
3187 boolean_t do_signal_idle
= FALSE
, do_cause_ast
= FALSE
;
3189 thread
->chosen_processor
= processor
;
3191 /* <rdar://problem/15102234> */
3192 assert(thread
->bound_processor
== PROCESSOR_NULL
);
3195 * Dispatch directly onto idle processor.
3197 if ( (thread
->bound_processor
== processor
)
3198 && processor
->state
== PROCESSOR_IDLE
) {
3199 remqueue((queue_entry_t
)processor
);
3200 enqueue_tail(&pset
->active_queue
, (queue_entry_t
)processor
);
3202 processor
->next_thread
= thread
;
3203 processor
->current_pri
= thread
->sched_pri
;
3204 processor
->current_thmode
= thread
->sched_mode
;
3205 processor
->current_sfi_class
= thread
->sfi_class
;
3206 processor
->deadline
= thread
->realtime
.deadline
;
3207 processor
->state
= PROCESSOR_DISPATCHING
;
3209 if (processor
!= current_processor()) {
3210 if (!(pset
->pending_AST_cpu_mask
& (1U << processor
->cpu_id
))) {
3211 /* cleared on exit from main processor_idle() loop */
3212 pset
->pending_AST_cpu_mask
|= (1U << processor
->cpu_id
);
3213 do_signal_idle
= TRUE
;
3218 if (do_signal_idle
) {
3219 machine_signal_idle(processor
);
3224 if (processor
->current_pri
< BASEPRI_RTQUEUES
)
3225 preempt
= (AST_PREEMPT
| AST_URGENT
);
3226 else if (thread
->realtime
.deadline
< processor
->deadline
)
3227 preempt
= (AST_PREEMPT
| AST_URGENT
);
3231 realtime_queue_insert(thread
);
3233 if (preempt
!= AST_NONE
) {
3234 if (processor
->state
== PROCESSOR_IDLE
) {
3235 remqueue((queue_entry_t
)processor
);
3236 enqueue_tail(&pset
->active_queue
, (queue_entry_t
)processor
);
3237 processor
->next_thread
= THREAD_NULL
;
3238 processor
->current_pri
= thread
->sched_pri
;
3239 processor
->current_thmode
= thread
->sched_mode
;
3240 processor
->current_sfi_class
= thread
->sfi_class
;
3241 processor
->deadline
= thread
->realtime
.deadline
;
3242 processor
->state
= PROCESSOR_DISPATCHING
;
3243 if (processor
== current_processor()) {
3246 if (!(pset
->pending_AST_cpu_mask
& (1U << processor
->cpu_id
))) {
3247 /* cleared on exit from main processor_idle() loop */
3248 pset
->pending_AST_cpu_mask
|= (1U << processor
->cpu_id
);
3249 do_signal_idle
= TRUE
;
3252 } else if (processor
->state
== PROCESSOR_DISPATCHING
) {
3253 if ((processor
->next_thread
== THREAD_NULL
) && ((processor
->current_pri
< thread
->sched_pri
) || (processor
->deadline
> thread
->realtime
.deadline
))) {
3254 processor
->current_pri
= thread
->sched_pri
;
3255 processor
->current_thmode
= thread
->sched_mode
;
3256 processor
->current_sfi_class
= thread
->sfi_class
;
3257 processor
->deadline
= thread
->realtime
.deadline
;
3260 if (processor
== current_processor()) {
3263 if (!(pset
->pending_AST_cpu_mask
& (1U << processor
->cpu_id
))) {
3264 /* cleared after IPI causes csw_check() to be called */
3265 pset
->pending_AST_cpu_mask
|= (1U << processor
->cpu_id
);
3266 do_cause_ast
= TRUE
;
3271 /* Selected processor was too busy, just keep thread enqueued and let other processors drain it naturally. */
3276 if (do_signal_idle
) {
3277 machine_signal_idle(processor
);
3278 } else if (do_cause_ast
) {
3279 cause_ast_check(processor
);
3284 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
3287 priority_is_urgent(int priority
)
3289 return testbit(priority
, sched_preempt_pri
) ? TRUE
: FALSE
;
3292 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
3294 #if defined(CONFIG_SCHED_TRADITIONAL)
3296 * processor_enqueue:
3298 * Enqueue thread on a processor run queue. Thread must be locked,
3299 * and not already be on a run queue.
3301 * Returns TRUE if a preemption is indicated based on the state
3304 * The run queue must be locked (see thread_run_queue_remove()
3309 processor_t processor
,
3313 run_queue_t rq
= runq_for_processor(processor
);
3316 result
= run_queue_enqueue(rq
, thread
, options
);
3317 thread
->runq
= processor
;
3318 runq_consider_incr_bound_count(processor
, thread
);
3323 #endif /* CONFIG_SCHED_TRADITIONAL */
3328 * Dispatch a thread for execution on a
3331 * Thread must be locked. Associated pset must
3332 * be locked, and is returned unlocked.
3336 processor_t processor
,
3340 processor_set_t pset
= processor
->processor_set
;
3342 enum { eExitIdle
, eInterruptRunning
, eDoNothing
} ipi_action
= eDoNothing
;
3344 boolean_t do_signal_idle
= FALSE
, do_cause_ast
= FALSE
;
3346 thread
->chosen_processor
= processor
;
3349 * Dispatch directly onto idle processor.
3351 if ( (SCHED(direct_dispatch_to_idle_processors
) ||
3352 thread
->bound_processor
== processor
)
3353 && processor
->state
== PROCESSOR_IDLE
) {
3354 remqueue((queue_entry_t
)processor
);
3355 enqueue_tail(&pset
->active_queue
, (queue_entry_t
)processor
);
3357 processor
->next_thread
= thread
;
3358 processor
->current_pri
= thread
->sched_pri
;
3359 processor
->current_thmode
= thread
->sched_mode
;
3360 processor
->current_sfi_class
= thread
->sfi_class
;
3361 processor
->deadline
= UINT64_MAX
;
3362 processor
->state
= PROCESSOR_DISPATCHING
;
3364 if (!(pset
->pending_AST_cpu_mask
& (1U << processor
->cpu_id
))) {
3365 /* cleared on exit from main processor_idle() loop */
3366 pset
->pending_AST_cpu_mask
|= (1U << processor
->cpu_id
);
3367 do_signal_idle
= TRUE
;
3371 if (do_signal_idle
) {
3372 machine_signal_idle(processor
);
3379 * Set preemption mode.
3381 if (SCHED(priority_is_urgent
)(thread
->sched_pri
) && thread
->sched_pri
> processor
->current_pri
)
3382 preempt
= (AST_PREEMPT
| AST_URGENT
);
3383 else if(processor
->active_thread
&& thread_eager_preemption(processor
->active_thread
))
3384 preempt
= (AST_PREEMPT
| AST_URGENT
);
3385 else if ((thread
->sched_mode
== TH_MODE_TIMESHARE
) && (thread
->sched_pri
< thread
->priority
)) {
3386 if(SCHED(priority_is_urgent
)(thread
->priority
) && thread
->sched_pri
> processor
->current_pri
) {
3387 preempt
= (options
& SCHED_PREEMPT
)? AST_PREEMPT
: AST_NONE
;
3392 preempt
= (options
& SCHED_PREEMPT
)? AST_PREEMPT
: AST_NONE
;
3394 SCHED(processor_enqueue
)(processor
, thread
, options
);
3396 if (preempt
!= AST_NONE
) {
3397 if (processor
->state
== PROCESSOR_IDLE
) {
3398 remqueue((queue_entry_t
)processor
);
3399 enqueue_tail(&pset
->active_queue
, (queue_entry_t
)processor
);
3400 processor
->next_thread
= THREAD_NULL
;
3401 processor
->current_pri
= thread
->sched_pri
;
3402 processor
->current_thmode
= thread
->sched_mode
;
3403 processor
->current_sfi_class
= thread
->sfi_class
;
3404 processor
->deadline
= UINT64_MAX
;
3405 processor
->state
= PROCESSOR_DISPATCHING
;
3407 ipi_action
= eExitIdle
;
3408 } else if ( processor
->state
== PROCESSOR_DISPATCHING
) {
3409 if ((processor
->next_thread
== THREAD_NULL
) && (processor
->current_pri
< thread
->sched_pri
)) {
3410 processor
->current_pri
= thread
->sched_pri
;
3411 processor
->current_thmode
= thread
->sched_mode
;
3412 processor
->current_sfi_class
= thread
->sfi_class
;
3413 processor
->deadline
= UINT64_MAX
;
3415 } else if ( (processor
->state
== PROCESSOR_RUNNING
||
3416 processor
->state
== PROCESSOR_SHUTDOWN
) &&
3417 (thread
->sched_pri
>= processor
->current_pri
||
3418 processor
->current_thmode
== TH_MODE_FAIRSHARE
)) {
3419 ipi_action
= eInterruptRunning
;
3423 * New thread is not important enough to preempt what is running, but
3424 * special processor states may need special handling
3426 if (processor
->state
== PROCESSOR_SHUTDOWN
&&
3427 thread
->sched_pri
>= processor
->current_pri
) {
3428 ipi_action
= eInterruptRunning
;
3429 } else if ( processor
->state
== PROCESSOR_IDLE
&&
3430 processor
!= current_processor() ) {
3431 remqueue((queue_entry_t
)processor
);
3432 enqueue_tail(&pset
->active_queue
, (queue_entry_t
)processor
);
3433 processor
->next_thread
= THREAD_NULL
;
3434 processor
->current_pri
= thread
->sched_pri
;
3435 processor
->current_thmode
= thread
->sched_mode
;
3436 processor
->current_sfi_class
= thread
->sfi_class
;
3437 processor
->deadline
= UINT64_MAX
;
3438 processor
->state
= PROCESSOR_DISPATCHING
;
3440 ipi_action
= eExitIdle
;
3444 switch (ipi_action
) {
3448 if (processor
== current_processor()) {
3449 if (csw_check_locked(processor
, pset
, AST_NONE
) != AST_NONE
)
3452 if (!(pset
->pending_AST_cpu_mask
& (1U << processor
->cpu_id
))) {
3453 /* cleared on exit from main processor_idle() loop */
3454 pset
->pending_AST_cpu_mask
|= (1U << processor
->cpu_id
);
3455 do_signal_idle
= TRUE
;
3459 case eInterruptRunning
:
3460 if (processor
== current_processor()) {
3461 if (csw_check_locked(processor
, pset
, AST_NONE
) != AST_NONE
)
3464 if (!(pset
->pending_AST_cpu_mask
& (1U << processor
->cpu_id
))) {
3465 /* cleared after IPI causes csw_check() to be called */
3466 pset
->pending_AST_cpu_mask
|= (1U << processor
->cpu_id
);
3467 do_cause_ast
= TRUE
;
3475 if (do_signal_idle
) {
3476 machine_signal_idle(processor
);
3477 } else if (do_cause_ast
) {
3478 cause_ast_check(processor
);
3482 #if defined(CONFIG_SCHED_TRADITIONAL)
3485 processor_queue_empty(processor_t processor
)
3487 return runq_for_processor(processor
)->count
== 0;
3492 sched_traditional_with_pset_runqueue_processor_queue_empty(processor_t processor
)
3494 processor_set_t pset
= processor
->processor_set
;
3495 int count
= runq_for_processor(processor
)->count
;
3498 * The pset runq contains the count of all runnable threads
3499 * for all processors in the pset. However, for threads that
3500 * are bound to another processor, the current "processor"
3501 * is not eligible to execute the thread. So we only
3502 * include bound threads that our bound to the current
3503 * "processor". This allows the processor to idle when the
3504 * count of eligible threads drops to 0, even if there's
3505 * a runnable thread bound to a different processor in the
3509 count
-= pset
->pset_runq_bound_count
;
3510 count
+= processor
->runq_bound_count
;
3516 processor_csw_check(processor_t processor
)
3519 boolean_t has_higher
;
3521 assert(processor
->active_thread
!= NULL
);
3523 runq
= runq_for_processor(processor
);
3524 if (first_timeslice(processor
)) {
3525 has_higher
= (runq
->highq
> processor
->current_pri
);
3527 has_higher
= (runq
->highq
>= processor
->current_pri
);
3530 if (runq
->urgency
> 0)
3531 return (AST_PREEMPT
| AST_URGENT
);
3533 if (processor
->active_thread
&& thread_eager_preemption(processor
->active_thread
))
3534 return (AST_PREEMPT
| AST_URGENT
);
3543 processor_queue_has_priority(processor_t processor
,
3548 return runq_for_processor(processor
)->highq
>= priority
;
3550 return runq_for_processor(processor
)->highq
> priority
;
3554 should_current_thread_rechoose_processor(processor_t processor
)
3556 return (processor
->current_pri
< BASEPRI_RTQUEUES
3557 && processor
->processor_primary
!= processor
);
3561 sched_traditional_processor_runq_count(processor_t processor
)
3563 return runq_for_processor(processor
)->count
;
3567 sched_traditional_processor_runq_stats_count_sum(processor_t processor
)
3569 return runq_for_processor(processor
)->runq_stats
.count_sum
;
3573 sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum(processor_t processor
)
3575 if (processor
->cpu_id
== processor
->processor_set
->cpu_set_low
)
3576 return runq_for_processor(processor
)->runq_stats
.count_sum
;
3582 sched_traditional_processor_bound_count(processor_t processor
)
3584 return processor
->runq_bound_count
;
3587 #endif /* CONFIG_SCHED_TRADITIONAL */
3592 * Return the next sibling pset containing
3593 * available processors.
3595 * Returns the original pset if none other is
3598 static processor_set_t
3600 processor_set_t pset
)
3602 processor_set_t nset
= pset
;
3605 nset
= next_pset(nset
);
3606 } while (nset
->online_processor_count
< 1 && nset
!= pset
);
3614 * Choose a processor for the thread, beginning at
3615 * the pset. Accepts an optional processor hint in
3618 * Returns a processor, possibly from a different pset.
3620 * The thread must be locked. The pset must be locked,
3621 * and the resulting pset is locked on return.
3625 processor_set_t pset
,
3626 processor_t processor
,
3629 processor_set_t nset
, cset
= pset
;
3632 * Prefer the hinted processor, when appropriate.
3635 /* Fold last processor hint from secondary processor to its primary */
3636 if (processor
!= PROCESSOR_NULL
) {
3637 processor
= processor
->processor_primary
;
3641 * Only consult platform layer if pset is active, which
3642 * it may not be in some cases when a multi-set system
3643 * is going to sleep.
3645 if (pset
->online_processor_count
) {
3646 if ((processor
== PROCESSOR_NULL
) || (processor
->processor_set
== pset
&& processor
->state
== PROCESSOR_IDLE
)) {
3647 processor_t mc_processor
= machine_choose_processor(pset
, processor
);
3648 if (mc_processor
!= PROCESSOR_NULL
)
3649 processor
= mc_processor
->processor_primary
;
3654 * At this point, we may have a processor hint, and we may have
3655 * an initial starting pset. If the hint is not in the pset, or
3656 * if the hint is for a processor in an invalid state, discard
3659 if (processor
!= PROCESSOR_NULL
) {
3660 if (processor
->processor_set
!= pset
) {
3661 processor
= PROCESSOR_NULL
;
3663 switch (processor
->state
) {
3664 case PROCESSOR_START
:
3665 case PROCESSOR_SHUTDOWN
:
3666 case PROCESSOR_OFF_LINE
:
3668 * Hint is for a processor that cannot support running new threads.
3670 processor
= PROCESSOR_NULL
;
3672 case PROCESSOR_IDLE
:
3674 * Hint is for an idle processor. Assume it is no worse than any other
3675 * idle processor. The platform layer had an opportunity to provide
3676 * the "least cost idle" processor above.
3680 case PROCESSOR_RUNNING
:
3681 case PROCESSOR_DISPATCHING
:
3683 * Hint is for an active CPU. This fast-path allows
3684 * realtime threads to preempt non-realtime threads
3685 * to regain their previous executing processor.
3687 if ((thread
->sched_pri
>= BASEPRI_RTQUEUES
) &&
3688 (processor
->current_pri
< BASEPRI_RTQUEUES
))
3691 /* Otherwise, use hint as part of search below */
3694 processor
= PROCESSOR_NULL
;
3701 * Iterate through the processor sets to locate
3702 * an appropriate processor. Seed results with
3703 * a last-processor hint, if available, so that
3704 * a search must find something strictly better
3707 * A primary/secondary pair of SMT processors are
3708 * "unpaired" if the primary is busy but its
3709 * corresponding secondary is idle (so the physical
3710 * core has full use of its resources).
3713 integer_t lowest_priority
= MAXPRI
+ 1;
3714 integer_t lowest_unpaired_primary_priority
= MAXPRI
+ 1;
3715 integer_t lowest_count
= INT_MAX
;
3716 uint64_t furthest_deadline
= 1;
3717 processor_t lp_processor
= PROCESSOR_NULL
;
3718 processor_t lp_unpaired_primary_processor
= PROCESSOR_NULL
;
3719 processor_t lp_unpaired_secondary_processor
= PROCESSOR_NULL
;
3720 processor_t lc_processor
= PROCESSOR_NULL
;
3721 processor_t fd_processor
= PROCESSOR_NULL
;
3723 if (processor
!= PROCESSOR_NULL
) {
3724 /* All other states should be enumerated above. */
3725 assert(processor
->state
== PROCESSOR_RUNNING
|| processor
->state
== PROCESSOR_DISPATCHING
);
3727 lowest_priority
= processor
->current_pri
;
3728 lp_processor
= processor
;
3730 if (processor
->current_pri
>= BASEPRI_RTQUEUES
) {
3731 furthest_deadline
= processor
->deadline
;
3732 fd_processor
= processor
;
3735 lowest_count
= SCHED(processor_runq_count
)(processor
);
3736 lc_processor
= processor
;
3742 * Choose an idle processor, in pset traversal order
3744 if (!queue_empty(&cset
->idle_queue
))
3745 return ((processor_t
)queue_first(&cset
->idle_queue
));
3748 * Otherwise, enumerate active and idle processors to find candidates
3749 * with lower priority/etc.
3752 processor
= (processor_t
)queue_first(&cset
->active_queue
);
3753 while (!queue_end(&cset
->active_queue
, (queue_entry_t
)processor
)) {
3755 integer_t cpri
= processor
->current_pri
;
3756 if (cpri
< lowest_priority
) {
3757 lowest_priority
= cpri
;
3758 lp_processor
= processor
;
3761 if ((cpri
>= BASEPRI_RTQUEUES
) && (processor
->deadline
> furthest_deadline
)) {
3762 furthest_deadline
= processor
->deadline
;
3763 fd_processor
= processor
;
3766 integer_t ccount
= SCHED(processor_runq_count
)(processor
);
3767 if (ccount
< lowest_count
) {
3768 lowest_count
= ccount
;
3769 lc_processor
= processor
;
3772 processor
= (processor_t
)queue_next((queue_entry_t
)processor
);
3776 * For SMT configs, these idle secondary processors must have active primary. Otherwise
3777 * the idle primary would have short-circuited the loop above
3779 processor
= (processor_t
)queue_first(&cset
->idle_secondary_queue
);
3780 while (!queue_end(&cset
->idle_secondary_queue
, (queue_entry_t
)processor
)) {
3781 processor_t cprimary
= processor
->processor_primary
;
3783 /* If the primary processor is offline or starting up, it's not a candidate for this path */
3784 if (cprimary
->state
== PROCESSOR_RUNNING
|| cprimary
->state
== PROCESSOR_DISPATCHING
) {
3785 integer_t primary_pri
= cprimary
->current_pri
;
3787 if (primary_pri
< lowest_unpaired_primary_priority
) {
3788 lowest_unpaired_primary_priority
= primary_pri
;
3789 lp_unpaired_primary_processor
= cprimary
;
3790 lp_unpaired_secondary_processor
= processor
;
3794 processor
= (processor_t
)queue_next((queue_entry_t
)processor
);
3798 if (thread
->sched_pri
>= BASEPRI_RTQUEUES
) {
3801 * For realtime threads, the most important aspect is
3802 * scheduling latency, so we attempt to assign threads
3803 * to good preemption candidates (assuming an idle primary
3804 * processor was not available above).
3807 if (thread
->sched_pri
> lowest_unpaired_primary_priority
) {
3808 /* Move to end of active queue so that the next thread doesn't also pick it */
3809 remqueue((queue_entry_t
)lp_unpaired_primary_processor
);
3810 enqueue_tail(&cset
->active_queue
, (queue_entry_t
)lp_unpaired_primary_processor
);
3811 return lp_unpaired_primary_processor
;
3813 if (thread
->sched_pri
> lowest_priority
) {
3814 /* Move to end of active queue so that the next thread doesn't also pick it */
3815 remqueue((queue_entry_t
)lp_processor
);
3816 enqueue_tail(&cset
->active_queue
, (queue_entry_t
)lp_processor
);
3817 return lp_processor
;
3819 if (thread
->realtime
.deadline
< furthest_deadline
)
3820 return fd_processor
;
3823 * If all primary and secondary CPUs are busy with realtime
3824 * threads with deadlines earlier than us, move on to next
3830 if (thread
->sched_pri
> lowest_unpaired_primary_priority
) {
3831 /* Move to end of active queue so that the next thread doesn't also pick it */
3832 remqueue((queue_entry_t
)lp_unpaired_primary_processor
);
3833 enqueue_tail(&cset
->active_queue
, (queue_entry_t
)lp_unpaired_primary_processor
);
3834 return lp_unpaired_primary_processor
;
3836 if (thread
->sched_pri
> lowest_priority
) {
3837 /* Move to end of active queue so that the next thread doesn't also pick it */
3838 remqueue((queue_entry_t
)lp_processor
);
3839 enqueue_tail(&cset
->active_queue
, (queue_entry_t
)lp_processor
);
3840 return lp_processor
;
3844 * If all primary processor in this pset are running a higher
3845 * priority thread, move on to next pset. Only when we have
3846 * exhausted this search do we fall back to other heuristics.
3851 * Move onto the next processor set.
3853 nset
= next_pset(cset
);
3861 } while (nset
!= pset
);
3864 * Make sure that we pick a running processor,
3865 * and that the correct processor set is locked.
3866 * Since we may have unlock the candidate processor's
3867 * pset, it may have changed state.
3869 * All primary processors are running a higher priority
3870 * thread, so the only options left are enqueuing on
3871 * the secondary processor that would perturb the least priority
3872 * primary, or the least busy primary.
3876 /* lowest_priority is evaluated in the main loops above */
3877 if (lp_unpaired_secondary_processor
!= PROCESSOR_NULL
) {
3878 processor
= lp_unpaired_secondary_processor
;
3879 lp_unpaired_secondary_processor
= PROCESSOR_NULL
;
3880 } else if (lc_processor
!= PROCESSOR_NULL
) {
3881 processor
= lc_processor
;
3882 lc_processor
= PROCESSOR_NULL
;
3885 * All processors are executing higher
3886 * priority threads, and the lowest_count
3887 * candidate was not usable
3889 processor
= master_processor
;
3893 * Check that the correct processor set is
3896 if (cset
!= processor
->processor_set
) {
3898 cset
= processor
->processor_set
;
3903 * We must verify that the chosen processor is still available.
3904 * master_processor is an exception, since we may need to preempt
3905 * a running thread on it during processor shutdown (for sleep),
3906 * and that thread needs to be enqueued on its runqueue to run
3907 * when the processor is restarted.
3909 if (processor
!= master_processor
&& (processor
->state
== PROCESSOR_SHUTDOWN
|| processor
->state
== PROCESSOR_OFF_LINE
))
3910 processor
= PROCESSOR_NULL
;
3912 } while (processor
== PROCESSOR_NULL
);
3920 * Dispatch thread for execution, onto an idle
3921 * processor or run queue, and signal a preemption
3924 * Thread must be locked.
3931 processor_t processor
;
3932 processor_set_t pset
;
3934 assert(thread_runnable(thread
));
3937 * Update priority if needed.
3939 if (SCHED(can_update_priority
)(thread
))
3940 SCHED(update_priority
)(thread
);
3942 thread
->sfi_class
= sfi_thread_classify(thread
);
3944 assert(thread
->runq
== PROCESSOR_NULL
);
3946 if (thread
->bound_processor
== PROCESSOR_NULL
) {
3950 if (thread
->affinity_set
!= AFFINITY_SET_NULL
) {
3952 * Use affinity set policy hint.
3954 pset
= thread
->affinity_set
->aset_pset
;
3957 processor
= SCHED(choose_processor
)(pset
, PROCESSOR_NULL
, thread
);
3959 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_SCHED_CHOOSE_PROCESSOR
)|DBG_FUNC_NONE
,
3960 (uintptr_t)thread_tid(thread
), (uintptr_t)-1, processor
->cpu_id
, processor
->state
, 0);
3963 if (thread
->last_processor
!= PROCESSOR_NULL
) {
3965 * Simple (last processor) affinity case.
3967 processor
= thread
->last_processor
;
3968 pset
= processor
->processor_set
;
3970 processor
= SCHED(choose_processor
)(pset
, processor
, thread
);
3972 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_SCHED_CHOOSE_PROCESSOR
)|DBG_FUNC_NONE
,
3973 (uintptr_t)thread_tid(thread
), thread
->last_processor
->cpu_id
, processor
->cpu_id
, processor
->state
, 0);
3979 * Utilitize a per task hint to spread threads
3980 * among the available processor sets.
3982 task_t task
= thread
->task
;
3984 pset
= task
->pset_hint
;
3985 if (pset
== PROCESSOR_SET_NULL
)
3986 pset
= current_processor()->processor_set
;
3988 pset
= choose_next_pset(pset
);
3991 processor
= SCHED(choose_processor
)(pset
, PROCESSOR_NULL
, thread
);
3992 task
->pset_hint
= processor
->processor_set
;
3994 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_SCHED_CHOOSE_PROCESSOR
)|DBG_FUNC_NONE
,
3995 (uintptr_t)thread_tid(thread
), (uintptr_t)-1, processor
->cpu_id
, processor
->state
, 0);
4002 * Unconditionally dispatch on the processor.
4004 processor
= thread
->bound_processor
;
4005 pset
= processor
->processor_set
;
4008 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_SCHED_CHOOSE_PROCESSOR
)|DBG_FUNC_NONE
,
4009 (uintptr_t)thread_tid(thread
), (uintptr_t)-2, processor
->cpu_id
, processor
->state
, 0);
4013 * Dispatch the thread on the choosen processor.
4014 * TODO: This should be based on sched_mode, not sched_pri
4016 if (thread
->sched_pri
>= BASEPRI_RTQUEUES
)
4017 realtime_setrun(processor
, thread
);
4018 else if (thread
->sched_mode
== TH_MODE_FAIRSHARE
)
4019 fairshare_setrun(processor
, thread
);
4021 processor_setrun(processor
, thread
, options
);
4028 processor_set_t pset
= task
->pset_hint
;
4030 if (pset
!= PROCESSOR_SET_NULL
)
4031 pset
= choose_next_pset(pset
);
4036 #if defined(CONFIG_SCHED_TRADITIONAL)
4039 * processor_queue_shutdown:
4041 * Shutdown a processor run queue by
4042 * re-dispatching non-bound threads.
4044 * Associated pset must be locked, and is
4045 * returned unlocked.
4048 processor_queue_shutdown(
4049 processor_t processor
)
4051 processor_set_t pset
= processor
->processor_set
;
4052 run_queue_t rq
= runq_for_processor(processor
);
4053 queue_t queue
= rq
->queues
+ rq
->highq
;
4054 int pri
= rq
->highq
, count
= rq
->count
;
4055 thread_t next
, thread
;
4056 queue_head_t tqueue
;
4058 queue_init(&tqueue
);
4061 thread
= (thread_t
)queue_first(queue
);
4062 while (!queue_end(queue
, (queue_entry_t
)thread
)) {
4063 next
= (thread_t
)queue_next((queue_entry_t
)thread
);
4065 if (thread
->bound_processor
== PROCESSOR_NULL
) {
4066 remqueue((queue_entry_t
)thread
);
4068 thread
->runq
= PROCESSOR_NULL
;
4069 SCHED_STATS_RUNQ_CHANGE(&rq
->runq_stats
, rq
->count
);
4070 runq_consider_decr_bound_count(processor
, thread
);
4072 if (SCHED(priority_is_urgent
)(pri
)) {
4073 rq
->urgency
--; assert(rq
->urgency
>= 0);
4075 if (queue_empty(queue
)) {
4077 clrbit(MAXPRI
- pri
, rq
->bitmap
);
4078 rq
->highq
= MAXPRI
- ffsbit(rq
->bitmap
);
4081 enqueue_tail(&tqueue
, (queue_entry_t
)thread
);
4093 while ((thread
= (thread_t
)dequeue_head(&tqueue
)) != THREAD_NULL
) {
4094 thread_lock(thread
);
4096 thread_setrun(thread
, SCHED_TAILQ
);
4098 thread_unlock(thread
);
4102 #endif /* CONFIG_SCHED_TRADITIONAL */
4105 * Check for a preemption point in
4106 * the current context.
4108 * Called at splsched with thread locked.
4112 processor_t processor
,
4115 processor_set_t pset
= processor
->processor_set
;
4120 /* If we were sent a remote AST and interrupted a running processor, acknowledge it here with pset lock held */
4121 pset
->pending_AST_cpu_mask
&= ~(1U << processor
->cpu_id
);
4123 result
= csw_check_locked(processor
, pset
, check_reason
);
4131 * Check for preemption at splsched with
4132 * pset and thread locked
4136 processor_t processor
,
4137 processor_set_t pset __unused
,
4141 thread_t thread
= processor
->active_thread
;
4143 if (first_timeslice(processor
)) {
4144 if (rt_runq
.count
> 0)
4145 return (check_reason
| AST_PREEMPT
| AST_URGENT
);
4148 if (rt_runq
.count
> 0) {
4149 if (BASEPRI_RTQUEUES
> processor
->current_pri
)
4150 return (check_reason
| AST_PREEMPT
| AST_URGENT
);
4152 return (check_reason
| AST_PREEMPT
);
4156 result
= SCHED(processor_csw_check
)(processor
);
4157 if (result
!= AST_NONE
)
4158 return (check_reason
| result
);
4160 if (SCHED(should_current_thread_rechoose_processor
)(processor
))
4161 return (check_reason
| AST_PREEMPT
);
4163 if (thread
->state
& TH_SUSP
)
4164 return (check_reason
| AST_PREEMPT
);
4167 * Current thread may not need to be preempted, but maybe needs
4170 result
= sfi_thread_needs_ast(thread
, NULL
);
4171 if (result
!= AST_NONE
)
4172 return (check_reason
| result
);
4180 * Set the scheduled priority of the specified thread.
4182 * This may cause the thread to change queues.
4184 * Thread must be locked.
4191 boolean_t removed
= thread_run_queue_remove(thread
);
4192 int curgency
, nurgency
;
4193 uint64_t urgency_param1
, urgency_param2
;
4194 thread_t cthread
= current_thread();
4196 if (thread
== cthread
) {
4197 curgency
= thread_get_urgency(thread
, &urgency_param1
, &urgency_param2
);
4200 thread
->sched_pri
= priority
;
4202 if (thread
== cthread
) {
4203 nurgency
= thread_get_urgency(thread
, &urgency_param1
, &urgency_param2
);
4204 /* set_sched_pri doesn't alter RT params. We expect direct base priority/QoS
4205 * class alterations from user space to occur relatively infrequently, hence
4206 * those are lazily handled. QoS classes have distinct priority bands, and QoS
4207 * inheritance is expected to involve priority changes.
4209 if (nurgency
!= curgency
) {
4210 thread_tell_urgency(nurgency
, urgency_param1
, urgency_param2
, thread
);
4215 thread_setrun(thread
, SCHED_PREEMPT
| SCHED_TAILQ
);
4217 if (thread
->state
& TH_RUN
) {
4218 processor_t processor
= thread
->last_processor
;
4220 if (thread
== current_thread()) {
4223 processor
->current_pri
= priority
;
4224 processor
->current_thmode
= thread
->sched_mode
;
4225 processor
->current_sfi_class
= thread
->sfi_class
= sfi_thread_classify(thread
);
4226 if ((preempt
= csw_check(processor
, AST_NONE
)) != AST_NONE
)
4230 if ( processor
!= PROCESSOR_NULL
&&
4231 processor
->active_thread
== thread
)
4232 cause_ast_check(processor
);
4246 if (rq
!= thread
->runq
)
4247 panic("run_queue_check: thread runq");
4249 if (thread
->sched_pri
> MAXPRI
|| thread
->sched_pri
< MINPRI
)
4250 panic("run_queue_check: thread sched_pri");
4252 q
= &rq
->queues
[thread
->sched_pri
];
4253 qe
= queue_first(q
);
4254 while (!queue_end(q
, qe
)) {
4255 if (qe
== (queue_entry_t
)thread
)
4258 qe
= queue_next(qe
);
4261 panic("run_queue_check: end");
4266 #if defined(CONFIG_SCHED_TRADITIONAL)
4269 * Locks the runqueue itself.
4271 * Thread must be locked.
4274 processor_queue_remove(
4275 processor_t processor
,
4281 rqlock
= &processor
->processor_set
->sched_lock
;
4282 rq
= runq_for_processor(processor
);
4284 simple_lock(rqlock
);
4285 if (processor
== thread
->runq
) {
4287 * Thread is on a run queue and we have a lock on
4290 runq_consider_decr_bound_count(processor
, thread
);
4291 run_queue_remove(rq
, thread
);
4295 * The thread left the run queue before we could
4296 * lock the run queue.
4298 assert(thread
->runq
== PROCESSOR_NULL
);
4299 processor
= PROCESSOR_NULL
;
4302 simple_unlock(rqlock
);
4304 return (processor
!= PROCESSOR_NULL
);
4307 #endif /* CONFIG_SCHED_TRADITIONAL */
4311 * thread_run_queue_remove:
4313 * Remove a thread from its current run queue and
4314 * return TRUE if successful.
4316 * Thread must be locked.
4318 * If thread->runq is PROCESSOR_NULL, the thread will not re-enter the
4319 * run queues because the caller locked the thread. Otherwise
4320 * the thread is on a run queue, but could be chosen for dispatch
4321 * and removed by another processor under a different lock, which
4322 * will set thread->runq to PROCESSOR_NULL.
4324 * Hence the thread select path must not rely on anything that could
4325 * be changed under the thread lock after calling this function,
4326 * most importantly thread->sched_pri.
4329 thread_run_queue_remove(
4332 boolean_t removed
= FALSE
;
4333 processor_t processor
= thread
->runq
;
4335 if ((thread
->state
& (TH_RUN
|TH_WAIT
)) == TH_WAIT
) {
4336 /* Thread isn't runnable */
4337 assert(thread
->runq
== PROCESSOR_NULL
);
4341 if (processor
== PROCESSOR_NULL
) {
4343 * The thread is either not on the runq,
4344 * or is in the midst of being removed from the runq.
4346 * runq is set to NULL under the pset lock, not the thread
4347 * lock, so the thread may still be in the process of being dequeued
4348 * from the runq. It will wait in invoke for the thread lock to be
4355 if (thread
->sched_mode
== TH_MODE_FAIRSHARE
) {
4356 return SCHED(fairshare_queue_remove
)(thread
);
4359 if (thread
->sched_pri
< BASEPRI_RTQUEUES
) {
4360 return SCHED(processor_queue_remove
)(processor
, thread
);
4363 simple_lock(&rt_lock
);
4365 if (thread
->runq
!= PROCESSOR_NULL
) {
4367 * Thread is on a run queue and we have a lock on
4371 assert(thread
->runq
== RT_RUNQ
);
4373 remqueue((queue_entry_t
)thread
);
4374 SCHED_STATS_RUNQ_CHANGE(&rt_runq
.runq_stats
, rt_runq
.count
);
4377 thread
->runq
= PROCESSOR_NULL
;
4382 simple_unlock(&rt_lock
);
4387 #if defined(CONFIG_SCHED_TRADITIONAL)
4390 * steal_processor_thread:
4392 * Locate a thread to steal from the processor and
4395 * Associated pset must be locked. Returns THREAD_NULL
4399 steal_processor_thread(
4400 processor_t processor
)
4402 run_queue_t rq
= runq_for_processor(processor
);
4403 queue_t queue
= rq
->queues
+ rq
->highq
;
4404 int pri
= rq
->highq
, count
= rq
->count
;
4408 thread
= (thread_t
)queue_first(queue
);
4409 while (!queue_end(queue
, (queue_entry_t
)thread
)) {
4410 if (thread
->bound_processor
== PROCESSOR_NULL
) {
4411 remqueue((queue_entry_t
)thread
);
4413 thread
->runq
= PROCESSOR_NULL
;
4414 SCHED_STATS_RUNQ_CHANGE(&rq
->runq_stats
, rq
->count
);
4415 runq_consider_decr_bound_count(processor
, thread
);
4417 if (SCHED(priority_is_urgent
)(pri
)) {
4418 rq
->urgency
--; assert(rq
->urgency
>= 0);
4420 if (queue_empty(queue
)) {
4422 clrbit(MAXPRI
- pri
, rq
->bitmap
);
4423 rq
->highq
= MAXPRI
- ffsbit(rq
->bitmap
);
4430 thread
= (thread_t
)queue_next((queue_entry_t
)thread
);
4436 return (THREAD_NULL
);
4440 * Locate and steal a thread, beginning
4443 * The pset must be locked, and is returned
4446 * Returns the stolen thread, or THREAD_NULL on
4451 processor_set_t pset
)
4453 processor_set_t nset
, cset
= pset
;
4454 processor_t processor
;
4458 processor
= (processor_t
)queue_first(&cset
->active_queue
);
4459 while (!queue_end(&cset
->active_queue
, (queue_entry_t
)processor
)) {
4460 if (runq_for_processor(processor
)->count
> 0) {
4461 thread
= steal_processor_thread(processor
);
4462 if (thread
!= THREAD_NULL
) {
4463 remqueue((queue_entry_t
)processor
);
4464 enqueue_tail(&cset
->active_queue
, (queue_entry_t
)processor
);
4472 processor
= (processor_t
)queue_next((queue_entry_t
)processor
);
4475 nset
= next_pset(cset
);
4483 } while (nset
!= pset
);
4487 return (THREAD_NULL
);
4490 static thread_t
steal_thread_disabled(
4491 processor_set_t pset
)
4495 return (THREAD_NULL
);
4498 #endif /* CONFIG_SCHED_TRADITIONAL */
4502 sys_override_cpu_throttle(int flag
)
4504 if (flag
== CPU_THROTTLE_ENABLE
)
4505 cpu_throttle_enabled
= 1;
4506 if (flag
== CPU_THROTTLE_DISABLE
)
4507 cpu_throttle_enabled
= 0;
4511 thread_get_urgency(thread_t thread
, uint64_t *arg1
, uint64_t *arg2
)
4513 if (thread
== NULL
|| (thread
->state
& TH_IDLE
)) {
4517 return (THREAD_URGENCY_NONE
);
4518 } else if (thread
->sched_mode
== TH_MODE_REALTIME
) {
4519 *arg1
= thread
->realtime
.period
;
4520 *arg2
= thread
->realtime
.deadline
;
4522 return (THREAD_URGENCY_REAL_TIME
);
4523 } else if (cpu_throttle_enabled
&&
4524 ((thread
->sched_pri
<= MAXPRI_THROTTLE
) && (thread
->priority
<= MAXPRI_THROTTLE
))) {
4526 * Background urgency applied when thread priority is MAXPRI_THROTTLE or lower and thread is not promoted
4527 * TODO: Use TH_SFLAG_THROTTLED instead?
4529 *arg1
= thread
->sched_pri
;
4530 *arg2
= thread
->priority
;
4532 return (THREAD_URGENCY_BACKGROUND
);
4534 /* For otherwise unclassified threads, report throughput QoS
4537 *arg1
= thread
->effective_policy
.t_through_qos
;
4538 *arg2
= thread
->task
->effective_policy
.t_through_qos
;
4540 return (THREAD_URGENCY_NORMAL
);
4546 * This is the processor idle loop, which just looks for other threads
4547 * to execute. Processor idle threads invoke this without supplying a
4548 * current thread to idle without an asserted wait state.
4550 * Returns a the next thread to execute if dispatched directly.
4554 #define IDLE_KERNEL_DEBUG_CONSTANT(...) KERNEL_DEBUG_CONSTANT(__VA_ARGS__)
4556 #define IDLE_KERNEL_DEBUG_CONSTANT(...) do { } while(0)
4562 processor_t processor
)
4564 processor_set_t pset
= processor
->processor_set
;
4565 thread_t new_thread
;
4569 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
4570 MACHDBG_CODE(DBG_MACH_SCHED
,MACH_IDLE
) | DBG_FUNC_START
,
4571 (uintptr_t)thread_tid(thread
), 0, 0, 0, 0);
4573 SCHED_STATS_CPU_IDLE_START(processor
);
4575 timer_switch(&PROCESSOR_DATA(processor
, system_state
),
4576 mach_absolute_time(), &PROCESSOR_DATA(processor
, idle_state
));
4577 PROCESSOR_DATA(processor
, current_state
) = &PROCESSOR_DATA(processor
, idle_state
);
4580 if (processor
->state
!= PROCESSOR_IDLE
) /* unsafe, but worst case we loop around once */
4582 if (pset
->pending_AST_cpu_mask
& (1U << processor
->cpu_id
))
4586 #if CONFIG_SCHED_IDLE_IN_PLACE
4587 if (thread
!= THREAD_NULL
) {
4588 /* Did idle-in-place thread wake up */
4589 if ((thread
->state
& (TH_WAIT
|TH_SUSP
)) != TH_WAIT
|| thread
->wake_active
)
4594 IDLE_KERNEL_DEBUG_CONSTANT(
4595 MACHDBG_CODE(DBG_MACH_SCHED
,MACH_IDLE
) | DBG_FUNC_NONE
, (uintptr_t)thread_tid(thread
), rt_runq
.count
, SCHED(processor_runq_count
)(processor
), -1, 0);
4597 machine_track_platform_idle(TRUE
);
4601 machine_track_platform_idle(FALSE
);
4605 IDLE_KERNEL_DEBUG_CONSTANT(
4606 MACHDBG_CODE(DBG_MACH_SCHED
,MACH_IDLE
) | DBG_FUNC_NONE
, (uintptr_t)thread_tid(thread
), rt_runq
.count
, SCHED(processor_runq_count
)(processor
), -2, 0);
4608 if (!SCHED(processor_queue_empty
)(processor
)) {
4609 /* Secondary SMT processors respond to directed wakeups
4610 * exclusively. Some platforms induce 'spurious' SMT wakeups.
4612 if (processor
->processor_primary
== processor
)
4617 timer_switch(&PROCESSOR_DATA(processor
, idle_state
),
4618 mach_absolute_time(), &PROCESSOR_DATA(processor
, system_state
));
4619 PROCESSOR_DATA(processor
, current_state
) = &PROCESSOR_DATA(processor
, system_state
);
4623 /* If we were sent a remote AST and came out of idle, acknowledge it here with pset lock held */
4624 pset
->pending_AST_cpu_mask
&= ~(1U << processor
->cpu_id
);
4626 state
= processor
->state
;
4627 if (state
== PROCESSOR_DISPATCHING
) {
4629 * Commmon case -- cpu dispatched.
4631 new_thread
= processor
->next_thread
;
4632 processor
->next_thread
= THREAD_NULL
;
4633 processor
->state
= PROCESSOR_RUNNING
;
4635 if ((new_thread
!= THREAD_NULL
) && (SCHED(processor_queue_has_priority
)(processor
, new_thread
->sched_pri
, FALSE
) ||
4636 (rt_runq
.count
> 0 && BASEPRI_RTQUEUES
>= new_thread
->sched_pri
)) ) {
4637 /* Something higher priority has popped up on the runqueue - redispatch this thread elsewhere */
4638 processor
->current_pri
= IDLEPRI
;
4639 processor
->current_thmode
= TH_MODE_FIXED
;
4640 processor
->current_sfi_class
= SFI_CLASS_KERNEL
;
4641 processor
->deadline
= UINT64_MAX
;
4645 thread_lock(new_thread
);
4646 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_REDISPATCH
), (uintptr_t)thread_tid(new_thread
), new_thread
->sched_pri
, rt_runq
.count
, 0, 0);
4647 thread_setrun(new_thread
, SCHED_HEADQ
);
4648 thread_unlock(new_thread
);
4650 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
4651 MACHDBG_CODE(DBG_MACH_SCHED
,MACH_IDLE
) | DBG_FUNC_END
,
4652 (uintptr_t)thread_tid(thread
), state
, 0, 0, 0);
4654 return (THREAD_NULL
);
4659 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
4660 MACHDBG_CODE(DBG_MACH_SCHED
,MACH_IDLE
) | DBG_FUNC_END
,
4661 (uintptr_t)thread_tid(thread
), state
, (uintptr_t)thread_tid(new_thread
), 0, 0);
4663 return (new_thread
);
4666 if (state
== PROCESSOR_IDLE
) {
4667 remqueue((queue_entry_t
)processor
);
4669 processor
->state
= PROCESSOR_RUNNING
;
4670 processor
->current_pri
= IDLEPRI
;
4671 processor
->current_thmode
= TH_MODE_FIXED
;
4672 processor
->current_sfi_class
= SFI_CLASS_KERNEL
;
4673 processor
->deadline
= UINT64_MAX
;
4674 enqueue_tail(&pset
->active_queue
, (queue_entry_t
)processor
);
4677 if (state
== PROCESSOR_SHUTDOWN
) {
4679 * Going off-line. Force a
4682 if ((new_thread
= processor
->next_thread
) != THREAD_NULL
) {
4683 processor
->next_thread
= THREAD_NULL
;
4684 processor
->current_pri
= IDLEPRI
;
4685 processor
->current_thmode
= TH_MODE_FIXED
;
4686 processor
->current_sfi_class
= SFI_CLASS_KERNEL
;
4687 processor
->deadline
= UINT64_MAX
;
4691 thread_lock(new_thread
);
4692 thread_setrun(new_thread
, SCHED_HEADQ
);
4693 thread_unlock(new_thread
);
4695 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
4696 MACHDBG_CODE(DBG_MACH_SCHED
,MACH_IDLE
) | DBG_FUNC_END
,
4697 (uintptr_t)thread_tid(thread
), state
, 0, 0, 0);
4699 return (THREAD_NULL
);
4705 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
4706 MACHDBG_CODE(DBG_MACH_SCHED
,MACH_IDLE
) | DBG_FUNC_END
,
4707 (uintptr_t)thread_tid(thread
), state
, 0, 0, 0);
4709 return (THREAD_NULL
);
4713 * Each processor has a dedicated thread which
4714 * executes the idle loop when there is no suitable
4720 processor_t processor
= current_processor();
4721 thread_t new_thread
;
4723 new_thread
= processor_idle(THREAD_NULL
, processor
);
4724 if (new_thread
!= THREAD_NULL
) {
4725 thread_run(processor
->idle_thread
, (thread_continue_t
)idle_thread
, NULL
, new_thread
);
4729 thread_block((thread_continue_t
)idle_thread
);
4735 processor_t processor
)
4737 kern_return_t result
;
4741 result
= kernel_thread_create((thread_continue_t
)idle_thread
, NULL
, MAXPRI_KERNEL
, &thread
);
4742 if (result
!= KERN_SUCCESS
)
4746 thread_lock(thread
);
4747 thread
->bound_processor
= processor
;
4748 processor
->idle_thread
= thread
;
4749 thread
->sched_pri
= thread
->priority
= IDLEPRI
;
4750 thread
->state
= (TH_RUN
| TH_IDLE
);
4751 thread
->options
|= TH_OPT_IDLE_THREAD
;
4752 thread_unlock(thread
);
4755 thread_deallocate(thread
);
4757 return (KERN_SUCCESS
);
4763 * Kicks off scheduler services.
4765 * Called at splsched.
4770 kern_return_t result
;
4773 result
= kernel_thread_start_priority((thread_continue_t
)sched_init_thread
,
4774 (void *)SCHED(maintenance_continuation
), MAXPRI_KERNEL
, &thread
);
4775 if (result
!= KERN_SUCCESS
)
4776 panic("sched_startup");
4778 thread_deallocate(thread
);
4781 * Yield to the sched_init_thread once, to
4782 * initialize our own thread after being switched
4785 * The current thread is the only other thread
4786 * active at this point.
4788 thread_block(THREAD_CONTINUE_NULL
);
4791 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
4793 static volatile uint64_t sched_maintenance_deadline
;
4794 #if defined(CONFIG_TELEMETRY)
4795 static volatile uint64_t sched_telemetry_deadline
= 0;
4797 static uint64_t sched_tick_last_abstime
;
4798 static uint64_t sched_tick_delta
;
4799 uint64_t sched_tick_max_delta
;
4801 * sched_init_thread:
4803 * Perform periodic bookkeeping functions about ten
4807 sched_traditional_maintenance_continue(void)
4809 uint64_t sched_tick_ctime
, late_time
;
4811 sched_tick_ctime
= mach_absolute_time();
4813 if (__improbable(sched_tick_last_abstime
== 0)) {
4814 sched_tick_last_abstime
= sched_tick_ctime
;
4816 sched_tick_delta
= 1;
4818 late_time
= sched_tick_ctime
- sched_tick_last_abstime
;
4819 sched_tick_delta
= late_time
/ sched_tick_interval
;
4820 /* Ensure a delta of 1, since the interval could be slightly
4821 * smaller than the sched_tick_interval due to dispatch
4824 sched_tick_delta
= MAX(sched_tick_delta
, 1);
4826 /* In the event interrupt latencies or platform
4827 * idle events that advanced the timebase resulted
4828 * in periods where no threads were dispatched,
4829 * cap the maximum "tick delta" at SCHED_TICK_MAX_DELTA
4832 sched_tick_delta
= MIN(sched_tick_delta
, SCHED_TICK_MAX_DELTA
);
4834 sched_tick_last_abstime
= sched_tick_ctime
;
4835 sched_tick_max_delta
= MAX(sched_tick_delta
, sched_tick_max_delta
);
4838 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_SCHED_MAINTENANCE
)|DBG_FUNC_START
,
4845 /* Add a number of pseudo-ticks corresponding to the elapsed interval
4846 * This could be greater than 1 if substantial intervals where
4847 * all processors are idle occur, which rarely occurs in practice.
4850 sched_tick
+= sched_tick_delta
;
4853 * Compute various averages.
4855 compute_averages(sched_tick_delta
);
4858 * Scan the run queues for threads which
4859 * may need to be updated.
4861 SCHED(thread_update_scan
)();
4863 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_SCHED_MAINTENANCE
)|DBG_FUNC_END
,
4865 sched_background_pri_shift
,
4870 assert_wait((event_t
)sched_traditional_maintenance_continue
, THREAD_UNINT
);
4871 thread_block((thread_continue_t
)sched_traditional_maintenance_continue
);
4875 static uint64_t sched_maintenance_wakeups
;
4878 * Determine if the set of routines formerly driven by a maintenance timer
4879 * must be invoked, based on a deadline comparison. Signals the scheduler
4880 * maintenance thread on deadline expiration. Must be invoked at an interval
4881 * lower than the "sched_tick_interval", currently accomplished by
4882 * invocation via the quantum expiration timer and at context switch time.
4883 * Performance matters: this routine reuses a timestamp approximating the
4884 * current absolute time received from the caller, and should perform
4885 * no more than a comparison against the deadline in the common case.
4888 sched_traditional_consider_maintenance(uint64_t ctime
) {
4889 uint64_t ndeadline
, deadline
= sched_maintenance_deadline
;
4891 if (__improbable(ctime
>= deadline
)) {
4892 if (__improbable(current_thread() == sched_maintenance_thread
))
4896 ndeadline
= ctime
+ sched_tick_interval
;
4898 if (__probable(__sync_bool_compare_and_swap(&sched_maintenance_deadline
, deadline
, ndeadline
))) {
4899 thread_wakeup((event_t
)sched_traditional_maintenance_continue
);
4900 sched_maintenance_wakeups
++;
4904 #if defined(CONFIG_TELEMETRY)
4906 * Windowed telemetry is driven by the scheduler. It should be safe
4907 * to call compute_telemetry_windowed() even when windowed telemetry
4908 * is disabled, but we should try to avoid doing extra work for no
4911 if (telemetry_window_enabled
) {
4912 deadline
= sched_telemetry_deadline
;
4914 if (__improbable(ctime
>= deadline
)) {
4915 ndeadline
= ctime
+ sched_telemetry_interval
;
4917 if (__probable(__sync_bool_compare_and_swap(&sched_telemetry_deadline
, deadline
, ndeadline
))) {
4918 compute_telemetry_windowed();
4922 #endif /* CONFIG_TELEMETRY */
4925 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
4928 sched_init_thread(void (*continuation
)(void))
4930 thread_block(THREAD_CONTINUE_NULL
);
4932 sched_maintenance_thread
= current_thread();
4938 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
4941 * thread_update_scan / runq_scan:
4943 * Scan the run queues to account for timesharing threads
4944 * which need to be updated.
4946 * Scanner runs in two passes. Pass one squirrels likely
4947 * threads away in an array, pass two does the update.
4949 * This is necessary because the run queue is locked for
4950 * the candidate scan, but the thread is locked for the update.
4952 * Array should be sized to make forward progress, without
4953 * disabling preemption for long periods.
4956 #define THREAD_UPDATE_SIZE 128
4958 static thread_t thread_update_array
[THREAD_UPDATE_SIZE
];
4959 static int thread_update_count
= 0;
4961 /* Returns TRUE if thread was added, FALSE if thread_update_array is full */
4963 thread_update_add_thread(thread_t thread
)
4965 if (thread_update_count
== THREAD_UPDATE_SIZE
)
4968 thread_update_array
[thread_update_count
++] = thread
;
4969 thread_reference_internal(thread
);
4974 thread_update_process_threads(void)
4976 while (thread_update_count
> 0) {
4978 thread_t thread
= thread_update_array
[--thread_update_count
];
4979 thread_update_array
[thread_update_count
] = THREAD_NULL
;
4982 thread_lock(thread
);
4983 if (!(thread
->state
& (TH_WAIT
)) && (SCHED(can_update_priority
)(thread
))) {
4984 SCHED(update_priority
)(thread
);
4986 thread_unlock(thread
);
4989 thread_deallocate(thread
);
4994 * Scan a runq for candidate threads.
4996 * Returns TRUE if retry is needed.
5004 register thread_t thread
;
5006 if ((count
= runq
->count
) > 0) {
5007 q
= runq
->queues
+ runq
->highq
;
5009 queue_iterate(q
, thread
, thread_t
, links
) {
5010 if ( thread
->sched_stamp
!= sched_tick
&&
5011 (thread
->sched_mode
== TH_MODE_TIMESHARE
) ) {
5012 if (thread_update_add_thread(thread
) == FALSE
)
5026 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
5028 #if defined(CONFIG_SCHED_TRADITIONAL)
5031 thread_update_scan(void)
5033 boolean_t restart_needed
= FALSE
;
5034 processor_t processor
= processor_list
;
5035 processor_set_t pset
;
5042 * TODO: in sched_traditional_use_pset_runqueue case,
5043 * avoid scanning the same runq multiple times
5045 pset
= processor
->processor_set
;
5050 restart_needed
= runq_scan(runq_for_processor(processor
));
5058 thread
= processor
->idle_thread
;
5059 if (thread
!= THREAD_NULL
&& thread
->sched_stamp
!= sched_tick
) {
5060 if (thread_update_add_thread(thread
) == FALSE
) {
5061 restart_needed
= TRUE
;
5065 } while ((processor
= processor
->processor_list
) != NULL
);
5067 /* Ok, we now have a collection of candidates -- fix them. */
5068 thread_update_process_threads();
5069 } while (restart_needed
);
5072 #endif /* CONFIG_SCHED_TRADITIONAL */
5075 thread_eager_preemption(thread_t thread
)
5077 return ((thread
->sched_flags
& TH_SFLAG_EAGERPREEMPT
) != 0);
5081 thread_set_eager_preempt(thread_t thread
)
5085 ast_t ast
= AST_NONE
;
5088 p
= current_processor();
5090 thread_lock(thread
);
5091 thread
->sched_flags
|= TH_SFLAG_EAGERPREEMPT
;
5093 if (thread
== current_thread()) {
5095 ast
= csw_check(p
, AST_NONE
);
5096 thread_unlock(thread
);
5097 if (ast
!= AST_NONE
) {
5098 (void) thread_block_reason(THREAD_CONTINUE_NULL
, NULL
, ast
);
5101 p
= thread
->last_processor
;
5103 if (p
!= PROCESSOR_NULL
&& p
->state
== PROCESSOR_RUNNING
&&
5104 p
->active_thread
== thread
) {
5108 thread_unlock(thread
);
5115 thread_clear_eager_preempt(thread_t thread
)
5120 thread_lock(thread
);
5122 thread
->sched_flags
&= ~TH_SFLAG_EAGERPREEMPT
;
5124 thread_unlock(thread
);
5128 * Scheduling statistics
5131 sched_stats_handle_csw(processor_t processor
, int reasons
, int selfpri
, int otherpri
)
5133 struct processor_sched_statistics
*stats
;
5134 boolean_t to_realtime
= FALSE
;
5136 stats
= &processor
->processor_data
.sched_stats
;
5139 if (otherpri
>= BASEPRI_REALTIME
) {
5140 stats
->rt_sched_count
++;
5144 if ((reasons
& AST_PREEMPT
) != 0) {
5145 stats
->preempt_count
++;
5147 if (selfpri
>= BASEPRI_REALTIME
) {
5148 stats
->preempted_rt_count
++;
5152 stats
->preempted_by_rt_count
++;
5159 sched_stats_handle_runq_change(struct runq_stats
*stats
, int old_count
)
5161 uint64_t timestamp
= mach_absolute_time();
5163 stats
->count_sum
+= (timestamp
- stats
->last_change_timestamp
) * old_count
;
5164 stats
->last_change_timestamp
= timestamp
;
5168 * For calls from assembly code
5170 #undef thread_wakeup
5179 thread_wakeup_with_result(x
, THREAD_AWAKENED
);
5183 preemption_enabled(void)
5185 return (get_preemption_level() == 0 && ml_get_interrupts_enabled());
5188 __assert_only
static boolean_t
5192 return ((thread
->state
& (TH_RUN
|TH_WAIT
)) == TH_RUN
);
5196 sched_timer_deadline_tracking_init(void) {
5197 nanoseconds_to_absolutetime(TIMER_DEADLINE_TRACKING_BIN_1_DEFAULT
, &timer_deadline_tracking_bin_1
);
5198 nanoseconds_to_absolutetime(TIMER_DEADLINE_TRACKING_BIN_2_DEFAULT
, &timer_deadline_tracking_bin_2
);