]> git.saurik.com Git - apple/xnu.git/blame - osfmk/kern/sched_prim.c
xnu-3789.70.16.tar.gz
[apple/xnu.git] / osfmk / kern / sched_prim.c
CommitLineData
1c79356b 1/*
39037602 2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_FREE_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: sched_prim.c
60 * Author: Avadis Tevanian, Jr.
61 * Date: 1986
62 *
63 * Scheduling primitives
64 *
65 */
66
67#include <debug.h>
91447636
A
68
69#include <mach/mach_types.h>
1c79356b 70#include <mach/machine.h>
91447636
A
71#include <mach/policy.h>
72#include <mach/sync_policy.h>
6d2010ae 73#include <mach/thread_act.h>
91447636 74
1c79356b
A
75#include <machine/machine_routines.h>
76#include <machine/sched_param.h>
0c530ab8 77#include <machine/machine_cpu.h>
6d2010ae 78#include <machine/machlimits.h>
91447636 79
fe8ab488
A
80#ifdef CONFIG_MACH_APPROXIMATE_TIME
81#include <machine/commpage.h>
82#endif
83
91447636 84#include <kern/kern_types.h>
39037602 85#include <kern/backtrace.h>
1c79356b
A
86#include <kern/clock.h>
87#include <kern/counters.h>
88#include <kern/cpu_number.h>
89#include <kern/cpu_data.h>
3e170ce0 90#include <kern/smp.h>
91447636 91#include <kern/debug.h>
1c79356b
A
92#include <kern/macro_help.h>
93#include <kern/machine.h>
94#include <kern/misc_protos.h>
95#include <kern/processor.h>
96#include <kern/queue.h>
97#include <kern/sched.h>
98#include <kern/sched_prim.h>
fe8ab488 99#include <kern/sfi.h>
1c79356b
A
100#include <kern/syscall_subr.h>
101#include <kern/task.h>
102#include <kern/thread.h>
316670eb 103#include <kern/ledger.h>
39236c6e 104#include <kern/timer_queue.h>
3e170ce0 105#include <kern/waitq.h>
39037602 106#include <kern/policy_internal.h>
91447636 107
1c79356b
A
108#include <vm/pmap.h>
109#include <vm/vm_kern.h>
110#include <vm/vm_map.h>
91447636 111
b0d623f7
A
112#include <mach/sdt.h>
113
1c79356b 114#include <sys/kdebug.h>
39037602
A
115#include <kperf/kperf.h>
116#include <kern/kpc.h>
1c79356b 117
0c530ab8 118#include <kern/pms.h>
3a60a9f5 119
6d2010ae 120struct rt_queue rt_runq;
2d21ac55 121
3e170ce0
A
122uintptr_t sched_thread_on_rt_queue = (uintptr_t)0xDEAFBEE0;
123
124/* Lock RT runq, must be done with interrupts disabled (under splsched()) */
125#if __SMP__
126decl_simple_lock_data(static,rt_lock);
127#define rt_lock_init() simple_lock_init(&rt_lock, 0)
128#define rt_lock_lock() simple_lock(&rt_lock)
129#define rt_lock_unlock() simple_unlock(&rt_lock)
130#else
131#define rt_lock_init() do { } while(0)
132#define rt_lock_lock() do { } while(0)
133#define rt_lock_unlock() do { } while(0)
134#endif
6d2010ae 135
0b4e3aa0 136#define DEFAULT_PREEMPTION_RATE 100 /* (1/s) */
1c79356b
A
137int default_preemption_rate = DEFAULT_PREEMPTION_RATE;
138
316670eb
A
139#define DEFAULT_BG_PREEMPTION_RATE 400 /* (1/s) */
140int default_bg_preemption_rate = DEFAULT_BG_PREEMPTION_RATE;
141
0b4e3aa0
A
142#define MAX_UNSAFE_QUANTA 800
143int max_unsafe_quanta = MAX_UNSAFE_QUANTA;
144
145#define MAX_POLL_QUANTA 2
146int max_poll_quanta = MAX_POLL_QUANTA;
147
148#define SCHED_POLL_YIELD_SHIFT 4 /* 1/16 */
149int sched_poll_yield_shift = SCHED_POLL_YIELD_SHIFT;
150
55e303ae
A
151uint64_t max_poll_computation;
152
6d2010ae
A
153uint64_t max_unsafe_computation;
154uint64_t sched_safe_duration;
155
fe8ab488 156#if defined(CONFIG_SCHED_TIMESHARE_CORE)
6d2010ae 157
55e303ae
A
158uint32_t std_quantum;
159uint32_t min_std_quantum;
316670eb 160uint32_t bg_quantum;
55e303ae 161
91447636 162uint32_t std_quantum_us;
316670eb 163uint32_t bg_quantum_us;
91447636 164
fe8ab488 165#endif /* CONFIG_SCHED_TIMESHARE_CORE */
6d2010ae
A
166
167uint32_t thread_depress_time;
168uint32_t default_timeshare_computation;
169uint32_t default_timeshare_constraint;
170
55e303ae
A
171uint32_t max_rt_quantum;
172uint32_t min_rt_quantum;
173
fe8ab488 174#if defined(CONFIG_SCHED_TIMESHARE_CORE)
6d2010ae 175
1c79356b 176unsigned sched_tick;
91447636 177uint32_t sched_tick_interval;
1c79356b 178
39037602 179uint32_t sched_pri_shifts[TH_BUCKET_MAX];
2d21ac55 180uint32_t sched_fixed_shift;
39236c6e
A
181
182uint32_t sched_decay_usage_age_factor = 1; /* accelerate 5/8^n usage aging */
2d21ac55 183
fe8ab488
A
184/* Allow foreground to decay past default to resolve inversions */
185#define DEFAULT_DECAY_BAND_LIMIT ((BASEPRI_FOREGROUND - BASEPRI_DEFAULT) + 2)
186int sched_pri_decay_band_limit = DEFAULT_DECAY_BAND_LIMIT;
6d2010ae 187
4b17d6b6
A
188/* Defaults for timer deadline profiling */
189#define TIMER_DEADLINE_TRACKING_BIN_1_DEFAULT 2000000 /* Timers with deadlines <=
39236c6e 190 * 2ms */
4b17d6b6 191#define TIMER_DEADLINE_TRACKING_BIN_2_DEFAULT 5000000 /* Timers with deadlines
39236c6e
A
192 <= 5ms */
193
4b17d6b6
A
194uint64_t timer_deadline_tracking_bin_1;
195uint64_t timer_deadline_tracking_bin_2;
196
490019cf
A
197#endif /* CONFIG_SCHED_TIMESHARE_CORE */
198
39236c6e
A
199thread_t sched_maintenance_thread;
200
fe8ab488 201
6d2010ae
A
202uint64_t sched_one_second_interval;
203
1c79356b 204/* Forwards */
6d2010ae 205
fe8ab488 206#if defined(CONFIG_SCHED_TIMESHARE_CORE)
6d2010ae 207
39236c6e
A
208static void load_shift_init(void);
209static void preempt_pri_init(void);
2d21ac55 210
fe8ab488 211#endif /* CONFIG_SCHED_TIMESHARE_CORE */
c910b4d9 212
6d2010ae
A
213static thread_t thread_select(
214 thread_t thread,
fe8ab488
A
215 processor_t processor,
216 ast_t reason);
b0d623f7 217
6d2010ae 218#if CONFIG_SCHED_IDLE_IN_PLACE
2d21ac55
A
219static thread_t thread_select_idle(
220 thread_t thread,
221 processor_t processor);
6d2010ae 222#endif
1c79356b 223
6d2010ae 224thread_t processor_idle(
2d21ac55
A
225 thread_t thread,
226 processor_t processor);
91447636 227
39236c6e
A
228ast_t
229csw_check_locked( processor_t processor,
fe8ab488
A
230 processor_set_t pset,
231 ast_t check_reason);
39236c6e 232
6d2010ae
A
233static void processor_setrun(
234 processor_t processor,
235 thread_t thread,
236 integer_t options);
237
6d2010ae 238static void
39236c6e 239sched_realtime_init(void);
6d2010ae
A
240
241static void
242sched_realtime_timebase_init(void);
243
4b17d6b6
A
244static void
245sched_timer_deadline_tracking_init(void);
246
2d21ac55
A
247#if DEBUG
248extern int debug_task;
249#define TLOG(a, fmt, args...) if(debug_task & a) kprintf(fmt, ## args)
250#else
251#define TLOG(a, fmt, args...) do {} while (0)
252#endif
253
3e170ce0
A
254static processor_t
255thread_bind_internal(
256 thread_t thread,
257 processor_t processor);
1c79356b 258
3e170ce0
A
259static void
260sched_vm_group_maintenance(void);
1c79356b 261
fe8ab488 262#if defined(CONFIG_SCHED_TIMESHARE_CORE)
91447636 263int8_t sched_load_shifts[NRQS];
39037602 264bitmap_t sched_preempt_pri[BITMAP_LEN(NRQS)];
fe8ab488 265#endif /* CONFIG_SCHED_TIMESHARE_CORE */
6d2010ae 266
6d2010ae
A
267const struct sched_dispatch_table *sched_current_dispatch = NULL;
268
269/*
270 * Statically allocate a buffer to hold the longest possible
271 * scheduler description string, as currently implemented.
272 * bsd/kern/kern_sysctl.c has a corresponding definition in bsd/
273 * to export to userspace via sysctl(3). If either version
274 * changes, update the other.
275 *
276 * Note that in addition to being an upper bound on the strings
277 * in the kernel, it's also an exact parameter to PE_get_default(),
278 * which interrogates the device tree on some platforms. That
279 * API requires the caller know the exact size of the device tree
280 * property, so we need both a legacy size (32) and the current size
281 * (48) to deal with old and new device trees. The device tree property
282 * is similarly padded to a fixed size so that the same kernel image
283 * can run on multiple devices with different schedulers configured
284 * in the device tree.
285 */
6d2010ae 286char sched_string[SCHED_STRING_MAX_LENGTH];
3e170ce0
A
287
288uint32_t sched_debug_flags;
39236c6e
A
289
290/* Global flag which indicates whether Background Stepper Context is enabled */
291static int cpu_throttle_enabled = 1;
91447636 292
1c79356b
A
293void
294sched_init(void)
6d2010ae
A
295{
296 char sched_arg[SCHED_STRING_MAX_LENGTH] = { '\0' };
297
298 /* Check for runtime selection of the scheduler algorithm */
299 if (!PE_parse_boot_argn("sched", sched_arg, sizeof (sched_arg))) {
300 /* If no boot-args override, look in device tree */
301 if (!PE_get_default("kern.sched", sched_arg,
302 SCHED_STRING_MAX_LENGTH)) {
303 sched_arg[0] = '\0';
304 }
305 }
306
fe8ab488
A
307
308 if (!PE_parse_boot_argn("sched_pri_decay_limit", &sched_pri_decay_band_limit, sizeof(sched_pri_decay_band_limit))) {
309 /* No boot-args, check in device tree */
310 if (!PE_get_default("kern.sched_pri_decay_limit",
311 &sched_pri_decay_band_limit,
312 sizeof(sched_pri_decay_band_limit))) {
313 /* Allow decay all the way to normal limits */
314 sched_pri_decay_band_limit = DEFAULT_DECAY_BAND_LIMIT;
315 }
316 }
317
318 kprintf("Setting scheduler priority decay band limit %d\n", sched_pri_decay_band_limit);
319
6d2010ae
A
320 if (strlen(sched_arg) > 0) {
321 if (0) {
322 /* Allow pattern below */
323#if defined(CONFIG_SCHED_TRADITIONAL)
3e170ce0 324 } else if (0 == strcmp(sched_arg, sched_traditional_dispatch.sched_name)) {
6d2010ae 325 sched_current_dispatch = &sched_traditional_dispatch;
3e170ce0 326 } else if (0 == strcmp(sched_arg, sched_traditional_with_pset_runqueue_dispatch.sched_name)) {
6d2010ae 327 sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch;
6d2010ae
A
328#endif
329#if defined(CONFIG_SCHED_PROTO)
3e170ce0 330 } else if (0 == strcmp(sched_arg, sched_proto_dispatch.sched_name)) {
6d2010ae 331 sched_current_dispatch = &sched_proto_dispatch;
6d2010ae
A
332#endif
333#if defined(CONFIG_SCHED_GRRR)
3e170ce0 334 } else if (0 == strcmp(sched_arg, sched_grrr_dispatch.sched_name)) {
6d2010ae 335 sched_current_dispatch = &sched_grrr_dispatch;
6d2010ae 336#endif
fe8ab488 337#if defined(CONFIG_SCHED_MULTIQ)
3e170ce0 338 } else if (0 == strcmp(sched_arg, sched_multiq_dispatch.sched_name)) {
fe8ab488 339 sched_current_dispatch = &sched_multiq_dispatch;
3e170ce0 340 } else if (0 == strcmp(sched_arg, sched_dualq_dispatch.sched_name)) {
fe8ab488 341 sched_current_dispatch = &sched_dualq_dispatch;
6d2010ae
A
342#endif
343 } else {
fe8ab488
A
344#if defined(CONFIG_SCHED_TRADITIONAL)
345 printf("Unrecognized scheduler algorithm: %s\n", sched_arg);
3e170ce0 346 printf("Scheduler: Using instead: %s\n", sched_traditional_with_pset_runqueue_dispatch.sched_name);
fe8ab488 347 sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch;
fe8ab488 348#else
6d2010ae 349 panic("Unrecognized scheduler algorithm: %s", sched_arg);
fe8ab488 350#endif
6d2010ae 351 }
3e170ce0 352 kprintf("Scheduler: Runtime selection of %s\n", SCHED(sched_name));
6d2010ae 353 } else {
fe8ab488
A
354#if defined(CONFIG_SCHED_MULTIQ)
355 sched_current_dispatch = &sched_multiq_dispatch;
fe8ab488 356#elif defined(CONFIG_SCHED_TRADITIONAL)
39236c6e 357 sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch;
6d2010ae
A
358#elif defined(CONFIG_SCHED_PROTO)
359 sched_current_dispatch = &sched_proto_dispatch;
6d2010ae
A
360#elif defined(CONFIG_SCHED_GRRR)
361 sched_current_dispatch = &sched_grrr_dispatch;
6d2010ae
A
362#else
363#error No default scheduler implementation
364#endif
3e170ce0
A
365 kprintf("Scheduler: Default of %s\n", SCHED(sched_name));
366 }
367
368 strlcpy(sched_string, SCHED(sched_name), sizeof(sched_string));
369
370 if (PE_parse_boot_argn("sched_debug", &sched_debug_flags, sizeof(sched_debug_flags))) {
371 kprintf("Scheduler: Debug flags 0x%08x\n", sched_debug_flags);
6d2010ae
A
372 }
373
374 SCHED(init)();
6d2010ae
A
375 sched_realtime_init();
376 ast_init();
4b17d6b6 377 sched_timer_deadline_tracking_init();
39236c6e 378
6d2010ae
A
379 SCHED(pset_init)(&pset0);
380 SCHED(processor_init)(master_processor);
381}
382
383void
384sched_timebase_init(void)
385{
386 uint64_t abstime;
387
388 clock_interval_to_absolutetime_interval(1, NSEC_PER_SEC, &abstime);
389 sched_one_second_interval = abstime;
390
391 SCHED(timebase_init)();
392 sched_realtime_timebase_init();
393}
394
fe8ab488 395#if defined(CONFIG_SCHED_TIMESHARE_CORE)
6d2010ae 396
fe8ab488 397void
3e170ce0 398sched_timeshare_init(void)
1c79356b
A
399{
400 /*
0b4e3aa0
A
401 * Calculate the timeslicing quantum
402 * in us.
1c79356b
A
403 */
404 if (default_preemption_rate < 1)
405 default_preemption_rate = DEFAULT_PREEMPTION_RATE;
0b4e3aa0 406 std_quantum_us = (1000 * 1000) / default_preemption_rate;
1c79356b 407
0b4e3aa0 408 printf("standard timeslicing quantum is %d us\n", std_quantum_us);
1c79356b 409
316670eb
A
410 if (default_bg_preemption_rate < 1)
411 default_bg_preemption_rate = DEFAULT_BG_PREEMPTION_RATE;
412 bg_quantum_us = (1000 * 1000) / default_bg_preemption_rate;
413
414 printf("standard background quantum is %d us\n", bg_quantum_us);
415
91447636 416 load_shift_init();
4a3eedf9 417 preempt_pri_init();
1c79356b 418 sched_tick = 0;
1c79356b
A
419}
420
fe8ab488 421void
3e170ce0 422sched_timeshare_timebase_init(void)
55e303ae 423{
91447636
A
424 uint64_t abstime;
425 uint32_t shift;
55e303ae 426
91447636 427 /* standard timeslicing quantum */
55e303ae
A
428 clock_interval_to_absolutetime_interval(
429 std_quantum_us, NSEC_PER_USEC, &abstime);
430 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
b0d623f7 431 std_quantum = (uint32_t)abstime;
55e303ae 432
91447636 433 /* smallest remaining quantum (250 us) */
55e303ae
A
434 clock_interval_to_absolutetime_interval(250, NSEC_PER_USEC, &abstime);
435 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
b0d623f7 436 min_std_quantum = (uint32_t)abstime;
55e303ae 437
316670eb
A
438 /* quantum for background tasks */
439 clock_interval_to_absolutetime_interval(
440 bg_quantum_us, NSEC_PER_USEC, &abstime);
441 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
442 bg_quantum = (uint32_t)abstime;
443
91447636
A
444 /* scheduler tick interval */
445 clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT,
446 NSEC_PER_USEC, &abstime);
cf7d32b8 447 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
b0d623f7 448 sched_tick_interval = (uint32_t)abstime;
55e303ae 449
91447636
A
450 /*
451 * Compute conversion factor from usage to
452 * timesharing priorities with 5/8 ** n aging.
453 */
454 abstime = (abstime * 5) / 3;
455 for (shift = 0; abstime > BASEPRI_DEFAULT; ++shift)
456 abstime >>= 1;
2d21ac55 457 sched_fixed_shift = shift;
91447636 458
39037602
A
459 for (uint32_t i = 0 ; i < TH_BUCKET_MAX ; i++)
460 sched_pri_shifts[i] = INT8_MAX;
461
fe8ab488
A
462 max_unsafe_computation = ((uint64_t)max_unsafe_quanta) * std_quantum;
463 sched_safe_duration = 2 * ((uint64_t)max_unsafe_quanta) * std_quantum;
39037602 464
fe8ab488 465 max_poll_computation = ((uint64_t)max_poll_quanta) * std_quantum;
6d2010ae
A
466 thread_depress_time = 1 * std_quantum;
467 default_timeshare_computation = std_quantum / 2;
468 default_timeshare_constraint = std_quantum;
469
470}
471
fe8ab488
A
472#endif /* CONFIG_SCHED_TIMESHARE_CORE */
473
6d2010ae
A
474static void
475sched_realtime_init(void)
476{
3e170ce0 477 rt_lock_init();
6d2010ae
A
478
479 rt_runq.count = 0;
480 queue_init(&rt_runq.queue);
55e303ae
A
481}
482
6d2010ae
A
483static void
484sched_realtime_timebase_init(void)
485{
486 uint64_t abstime;
487
488 /* smallest rt computaton (50 us) */
489 clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime);
490 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
491 min_rt_quantum = (uint32_t)abstime;
492
493 /* maximum rt computation (50 ms) */
494 clock_interval_to_absolutetime_interval(
495 50, 1000*NSEC_PER_USEC, &abstime);
496 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
497 max_rt_quantum = (uint32_t)abstime;
498
499}
500
fe8ab488 501#if defined(CONFIG_SCHED_TIMESHARE_CORE)
6d2010ae 502
91447636
A
503/*
504 * Set up values for timeshare
505 * loading factors.
506 */
507static void
508load_shift_init(void)
509{
510 int8_t k, *p = sched_load_shifts;
511 uint32_t i, j;
512
39236c6e
A
513 uint32_t sched_decay_penalty = 1;
514
515 if (PE_parse_boot_argn("sched_decay_penalty", &sched_decay_penalty, sizeof (sched_decay_penalty))) {
516 kprintf("Overriding scheduler decay penalty %u\n", sched_decay_penalty);
517 }
518
519 if (PE_parse_boot_argn("sched_decay_usage_age_factor", &sched_decay_usage_age_factor, sizeof (sched_decay_usage_age_factor))) {
520 kprintf("Overriding scheduler decay usage age factor %u\n", sched_decay_usage_age_factor);
521 }
522
39236c6e
A
523 if (sched_decay_penalty == 0) {
524 /*
525 * There is no penalty for timeshare threads for using too much
526 * CPU, so set all load shifts to INT8_MIN. Even under high load,
527 * sched_pri_shift will be >INT8_MAX, and there will be no
528 * penalty applied to threads (nor will sched_usage be updated per
529 * thread).
530 */
531 for (i = 0; i < NRQS; i++) {
532 sched_load_shifts[i] = INT8_MIN;
533 }
534
535 return;
536 }
537
91447636
A
538 *p++ = INT8_MIN; *p++ = 0;
539
39236c6e
A
540 /*
541 * For a given system load "i", the per-thread priority
542 * penalty per quantum of CPU usage is ~2^k priority
543 * levels. "sched_decay_penalty" can cause more
544 * array entries to be filled with smaller "k" values
545 */
546 for (i = 2, j = 1 << sched_decay_penalty, k = 1; i < NRQS; ++k) {
547 for (j <<= 1; (i < j) && (i < NRQS); ++i)
91447636
A
548 *p++ = k;
549 }
550}
551
4a3eedf9
A
552static void
553preempt_pri_init(void)
554{
39037602 555 bitmap_t *p = sched_preempt_pri;
4a3eedf9 556
39037602
A
557 for (int i = BASEPRI_FOREGROUND; i < MINPRI_KERNEL; ++i)
558 bitmap_set(p, i);
4a3eedf9 559
39037602
A
560 for (int i = BASEPRI_PREEMPT; i <= MAXPRI; ++i)
561 bitmap_set(p, i);
4a3eedf9
A
562}
563
fe8ab488 564#endif /* CONFIG_SCHED_TIMESHARE_CORE */
6d2010ae 565
1c79356b 566/*
0b4e3aa0 567 * Thread wait timer expiration.
1c79356b
A
568 */
569void
570thread_timer_expire(
91447636
A
571 void *p0,
572 __unused void *p1)
1c79356b
A
573{
574 thread_t thread = p0;
575 spl_t s;
576
39037602
A
577 assert_thread_magic(thread);
578
1c79356b 579 s = splsched();
55e303ae 580 thread_lock(thread);
91447636 581 if (--thread->wait_timer_active == 0) {
0b4e3aa0
A
582 if (thread->wait_timer_is_set) {
583 thread->wait_timer_is_set = FALSE;
55e303ae 584 clear_wait_internal(thread, THREAD_TIMED_OUT);
0b4e3aa0 585 }
1c79356b 586 }
55e303ae 587 thread_unlock(thread);
1c79356b
A
588 splx(s);
589}
590
1c79356b 591/*
91447636
A
592 * thread_unblock:
593 *
594 * Unblock thread on wake up.
595 *
3e170ce0 596 * Returns TRUE if the thread should now be placed on the runqueue.
91447636
A
597 *
598 * Thread must be locked.
3e170ce0
A
599 *
600 * Called at splsched().
1c79356b 601 */
91447636
A
602boolean_t
603thread_unblock(
604 thread_t thread,
605 wait_result_t wresult)
1c79356b 606{
3e170ce0 607 boolean_t ready_for_runq = FALSE;
4b17d6b6 608 thread_t cthread = current_thread();
fe8ab488 609 uint32_t new_run_count;
0b4e3aa0 610
91447636 611 /*
2d21ac55 612 * Set wait_result.
91447636
A
613 */
614 thread->wait_result = wresult;
1c79356b 615
91447636 616 /*
2d21ac55 617 * Cancel pending wait timer.
91447636 618 */
1c79356b
A
619 if (thread->wait_timer_is_set) {
620 if (timer_call_cancel(&thread->wait_timer))
621 thread->wait_timer_active--;
622 thread->wait_timer_is_set = FALSE;
623 }
624
91447636 625 /*
2d21ac55
A
626 * Update scheduling state: not waiting,
627 * set running.
91447636
A
628 */
629 thread->state &= ~(TH_WAIT|TH_UNINT);
1c79356b 630
91447636
A
631 if (!(thread->state & TH_RUN)) {
632 thread->state |= TH_RUN;
3e170ce0
A
633 thread->last_made_runnable_time = mach_approximate_time();
634
635 ready_for_runq = TRUE;
1c79356b 636
2d21ac55 637 (*thread->sched_call)(SCHED_CALL_UNBLOCK, thread);
1c79356b 638
39037602 639 /* Update the runnable thread count */
fe8ab488 640 new_run_count = sched_run_incr(thread);
3e170ce0 641 } else {
2d21ac55 642 /*
39037602
A
643 * Either the thread is idling in place on another processor,
644 * or it hasn't finished context switching yet.
2d21ac55 645 */
6d2010ae 646#if CONFIG_SCHED_IDLE_IN_PLACE
2d21ac55
A
647 if (thread->state & TH_IDLE) {
648 processor_t processor = thread->last_processor;
649
650 if (processor != current_processor())
651 machine_signal_idle(processor);
652 }
6d2010ae
A
653#else
654 assert((thread->state & TH_IDLE) == 0);
655#endif
39037602
A
656 /*
657 * The run count is only dropped after the context switch completes
658 * and the thread is still waiting, so we should not run_incr here
659 */
660 new_run_count = sched_run_buckets[TH_BUCKET_RUN];
2d21ac55 661 }
1c79356b 662
3e170ce0 663
91447636
A
664 /*
665 * Calculate deadline for real-time threads.
666 */
6d2010ae 667 if (thread->sched_mode == TH_MODE_REALTIME) {
3e170ce0 668 uint64_t ctime;
fe8ab488
A
669
670 ctime = mach_absolute_time();
671 thread->realtime.deadline = thread->realtime.constraint + ctime;
0b4e3aa0
A
672 }
673
91447636
A
674 /*
675 * Clear old quantum, fail-safe computation, etc.
676 */
fe8ab488 677 thread->quantum_remaining = 0;
91447636
A
678 thread->computation_metered = 0;
679 thread->reason = AST_NONE;
813fb2f6 680 thread->block_hint = kThreadWaitNone;
1c79356b 681
4b17d6b6
A
682 /* Obtain power-relevant interrupt and "platform-idle exit" statistics.
683 * We also account for "double hop" thread signaling via
684 * the thread callout infrastructure.
685 * DRK: consider removing the callout wakeup counters in the future
686 * they're present for verification at the moment.
687 */
688 boolean_t aticontext, pidle;
689 ml_get_power_state(&aticontext, &pidle);
39236c6e
A
690
691 if (__improbable(aticontext && !(thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT))) {
4b17d6b6 692 ledger_credit(thread->t_ledger, task_ledgers.interrupt_wakeups, 1);
39236c6e
A
693 DTRACE_SCHED2(iwakeup, struct thread *, thread, struct proc *, thread->task->bsd_info);
694
4b17d6b6 695 uint64_t ttd = PROCESSOR_DATA(current_processor(), timer_call_ttd);
39236c6e 696
4b17d6b6
A
697 if (ttd) {
698 if (ttd <= timer_deadline_tracking_bin_1)
699 thread->thread_timer_wakeups_bin_1++;
700 else
701 if (ttd <= timer_deadline_tracking_bin_2)
702 thread->thread_timer_wakeups_bin_2++;
703 }
39236c6e 704
4b17d6b6
A
705 if (pidle) {
706 ledger_credit(thread->t_ledger, task_ledgers.platform_idle_wakeups, 1);
707 }
39236c6e 708
4b17d6b6
A
709 } else if (thread_get_tag_internal(cthread) & THREAD_TAG_CALLOUT) {
710 if (cthread->callout_woken_from_icontext) {
711 ledger_credit(thread->t_ledger, task_ledgers.interrupt_wakeups, 1);
712 thread->thread_callout_interrupt_wakeups++;
713 if (cthread->callout_woken_from_platform_idle) {
714 ledger_credit(thread->t_ledger, task_ledgers.platform_idle_wakeups, 1);
715 thread->thread_callout_platform_idle_wakeups++;
716 }
39236c6e
A
717
718 cthread->callout_woke_thread = TRUE;
4b17d6b6
A
719 }
720 }
721
722 if (thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT) {
39236c6e
A
723 thread->callout_woken_from_icontext = aticontext;
724 thread->callout_woken_from_platform_idle = pidle;
725 thread->callout_woke_thread = FALSE;
4b17d6b6
A
726 }
727
fe8ab488
A
728 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
729 MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE,
39037602
A
730 (uintptr_t)thread_tid(thread), thread->sched_pri, thread->wait_result,
731 sched_run_buckets[TH_BUCKET_RUN], 0);
b0d623f7
A
732
733 DTRACE_SCHED2(wakeup, struct thread *, thread, struct proc *, thread->task->bsd_info);
91447636 734
3e170ce0 735 return (ready_for_runq);
1c79356b
A
736}
737
738/*
91447636 739 * Routine: thread_go
1c79356b 740 * Purpose:
91447636 741 * Unblock and dispatch thread.
1c79356b
A
742 * Conditions:
743 * thread lock held, IPC locks may be held.
744 * thread must have been pulled from wait queue under same lock hold.
3e170ce0
A
745 * thread must have been waiting
746 * Returns:
9bccf70c 747 * KERN_SUCCESS - Thread was set running
3e170ce0
A
748 *
749 * TODO: This should return void
1c79356b 750 */
9bccf70c 751kern_return_t
91447636 752thread_go(
3e170ce0
A
753 thread_t thread,
754 wait_result_t wresult)
1c79356b 755{
39037602
A
756 assert_thread_magic(thread);
757
1c79356b 758 assert(thread->at_safe_point == FALSE);
9bccf70c 759 assert(thread->wait_event == NO_EVENT64);
3e170ce0 760 assert(thread->waitq == NULL);
1c79356b 761
3e170ce0
A
762 assert(!(thread->state & (TH_TERMINATE|TH_TERMINATE2)));
763 assert(thread->state & TH_WAIT);
55e303ae 764
55e303ae 765
39037602
A
766 if (thread_unblock(thread, wresult)) {
767#if SCHED_TRACE_THREAD_WAKEUPS
768 backtrace(&thread->thread_wakeup_bt[0],
769 (sizeof(thread->thread_wakeup_bt)/sizeof(uintptr_t)));
770#endif
3e170ce0 771 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
39037602 772 }
3e170ce0
A
773
774 return (KERN_SUCCESS);
1c79356b
A
775}
776
9bccf70c
A
777/*
778 * Routine: thread_mark_wait_locked
779 * Purpose:
780 * Mark a thread as waiting. If, given the circumstances,
781 * it doesn't want to wait (i.e. already aborted), then
782 * indicate that in the return value.
783 * Conditions:
784 * at splsched() and thread is locked.
785 */
786__private_extern__
787wait_result_t
1c79356b 788thread_mark_wait_locked(
9bccf70c
A
789 thread_t thread,
790 wait_interrupt_t interruptible)
1c79356b 791{
55e303ae 792 boolean_t at_safe_point;
1c79356b 793
3e170ce0 794 assert(!(thread->state & (TH_WAIT|TH_IDLE|TH_UNINT|TH_TERMINATE2)));
b0d623f7 795
9bccf70c
A
796 /*
797 * The thread may have certain types of interrupts/aborts masked
798 * off. Even if the wait location says these types of interrupts
799 * are OK, we have to honor mask settings (outer-scoped code may
800 * not be able to handle aborts at the moment).
801 */
91447636
A
802 if (interruptible > (thread->options & TH_OPT_INTMASK))
803 interruptible = thread->options & TH_OPT_INTMASK;
9bccf70c
A
804
805 at_safe_point = (interruptible == THREAD_ABORTSAFE);
806
55e303ae 807 if ( interruptible == THREAD_UNINT ||
6d2010ae 808 !(thread->sched_flags & TH_SFLAG_ABORT) ||
55e303ae 809 (!at_safe_point &&
6d2010ae 810 (thread->sched_flags & TH_SFLAG_ABORTSAFELY))) {
b0d623f7 811
316670eb
A
812 if ( !(thread->state & TH_TERMINATE))
813 DTRACE_SCHED(sleep);
b0d623f7 814
9bccf70c
A
815 thread->state |= (interruptible) ? TH_WAIT : (TH_WAIT | TH_UNINT);
816 thread->at_safe_point = at_safe_point;
813fb2f6
A
817
818 /* TODO: pass this through assert_wait instead, have
819 * assert_wait just take a struct as an argument */
820 assert(!thread->block_hint);
821 thread->block_hint = thread->pending_block_hint;
822 thread->pending_block_hint = kThreadWaitNone;
823
9bccf70c 824 return (thread->wait_result = THREAD_WAITING);
9bccf70c 825 }
55e303ae 826 else
6d2010ae
A
827 if (thread->sched_flags & TH_SFLAG_ABORTSAFELY)
828 thread->sched_flags &= ~TH_SFLAG_ABORTED_MASK;
813fb2f6 829 thread->pending_block_hint = kThreadWaitNone;
55e303ae 830
9bccf70c 831 return (thread->wait_result = THREAD_INTERRUPTED);
1c79356b
A
832}
833
9bccf70c
A
834/*
835 * Routine: thread_interrupt_level
836 * Purpose:
837 * Set the maximum interruptible state for the
838 * current thread. The effective value of any
839 * interruptible flag passed into assert_wait
840 * will never exceed this.
841 *
842 * Useful for code that must not be interrupted,
843 * but which calls code that doesn't know that.
844 * Returns:
845 * The old interrupt level for the thread.
846 */
847__private_extern__
848wait_interrupt_t
849thread_interrupt_level(
850 wait_interrupt_t new_level)
851{
852 thread_t thread = current_thread();
91447636 853 wait_interrupt_t result = thread->options & TH_OPT_INTMASK;
1c79356b 854
91447636 855 thread->options = (thread->options & ~TH_OPT_INTMASK) | (new_level & TH_OPT_INTMASK);
1c79356b 856
91447636 857 return result;
1c79356b
A
858}
859
860/*
861 * Check to see if an assert wait is possible, without actually doing one.
862 * This is used by debug code in locks and elsewhere to verify that it is
863 * always OK to block when trying to take a blocking lock (since waiting
864 * for the actual assert_wait to catch the case may make it hard to detect
865 * this case.
866 */
867boolean_t
868assert_wait_possible(void)
869{
870
871 thread_t thread;
1c79356b
A
872
873#if DEBUG
874 if(debug_mode) return TRUE; /* Always succeed in debug mode */
875#endif
876
877 thread = current_thread();
878
3e170ce0 879 return (thread == NULL || waitq_wait_possible(thread));
1c79356b
A
880}
881
882/*
883 * assert_wait:
884 *
885 * Assert that the current thread is about to go to
886 * sleep until the specified event occurs.
887 */
9bccf70c 888wait_result_t
1c79356b
A
889assert_wait(
890 event_t event,
9bccf70c 891 wait_interrupt_t interruptible)
1c79356b 892{
3e170ce0
A
893 if (__improbable(event == NO_EVENT))
894 panic("%s() called with NO_EVENT", __func__);
1c79356b 895
316670eb
A
896 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
897 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
4bd07ac2 898 VM_KERNEL_UNSLIDE_OR_PERM(event), 0, 0, 0, 0);
316670eb 899
3e170ce0
A
900 struct waitq *waitq;
901 waitq = global_eventq(event);
902 return waitq_assert_wait64(waitq, CAST_EVENT64_T(event), interruptible, TIMEOUT_WAIT_FOREVER);
9bccf70c
A
903}
904
39037602
A
905/*
906 * assert_wait_queue:
907 *
908 * Return the global waitq for the specified event
909 */
910struct waitq *
911assert_wait_queue(
912 event_t event)
913{
914 return global_eventq(event);
915}
916
91447636
A
917wait_result_t
918assert_wait_timeout(
919 event_t event,
920 wait_interrupt_t interruptible,
921 uint32_t interval,
922 uint32_t scale_factor)
55e303ae 923{
91447636
A
924 thread_t thread = current_thread();
925 wait_result_t wresult;
91447636
A
926 uint64_t deadline;
927 spl_t s;
928
3e170ce0
A
929 if (__improbable(event == NO_EVENT))
930 panic("%s() called with NO_EVENT", __func__);
fe8ab488 931
3e170ce0
A
932 struct waitq *waitq;
933 waitq = global_eventq(event);
91447636
A
934
935 s = splsched();
3e170ce0 936 waitq_lock(waitq);
91447636
A
937
938 clock_interval_to_deadline(interval, scale_factor, &deadline);
3e170ce0 939
316670eb 940 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3e170ce0 941 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
4bd07ac2 942 VM_KERNEL_UNSLIDE_OR_PERM(event), interruptible, deadline, 0, 0);
3e170ce0
A
943
944 wresult = waitq_assert_wait64_locked(waitq, CAST_EVENT64_T(event),
945 interruptible,
946 TIMEOUT_URGENCY_SYS_NORMAL,
947 deadline, TIMEOUT_NO_LEEWAY,
948 thread);
39236c6e 949
3e170ce0 950 waitq_unlock(waitq);
39236c6e 951 splx(s);
3e170ce0 952 return wresult;
39236c6e
A
953}
954
955wait_result_t
956assert_wait_timeout_with_leeway(
957 event_t event,
958 wait_interrupt_t interruptible,
959 wait_timeout_urgency_t urgency,
960 uint32_t interval,
961 uint32_t leeway,
962 uint32_t scale_factor)
963{
964 thread_t thread = current_thread();
965 wait_result_t wresult;
39236c6e
A
966 uint64_t deadline;
967 uint64_t abstime;
968 uint64_t slop;
969 uint64_t now;
970 spl_t s;
971
3e170ce0
A
972 if (__improbable(event == NO_EVENT))
973 panic("%s() called with NO_EVENT", __func__);
974
39236c6e
A
975 now = mach_absolute_time();
976 clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime);
977 deadline = now + abstime;
978
979 clock_interval_to_absolutetime_interval(leeway, scale_factor, &slop);
980
3e170ce0
A
981 struct waitq *waitq;
982 waitq = global_eventq(event);
39236c6e
A
983
984 s = splsched();
3e170ce0 985 waitq_lock(waitq);
39236c6e
A
986
987 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3e170ce0 988 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
4bd07ac2 989 VM_KERNEL_UNSLIDE_OR_PERM(event), interruptible, deadline, 0, 0);
3e170ce0
A
990
991 wresult = waitq_assert_wait64_locked(waitq, CAST_EVENT64_T(event),
992 interruptible,
993 urgency, deadline, slop,
994 thread);
91447636 995
3e170ce0 996 waitq_unlock(waitq);
91447636 997 splx(s);
3e170ce0 998 return wresult;
55e303ae
A
999}
1000
1001wait_result_t
91447636 1002assert_wait_deadline(
55e303ae 1003 event_t event,
91447636
A
1004 wait_interrupt_t interruptible,
1005 uint64_t deadline)
55e303ae
A
1006{
1007 thread_t thread = current_thread();
91447636 1008 wait_result_t wresult;
55e303ae
A
1009 spl_t s;
1010
3e170ce0
A
1011 if (__improbable(event == NO_EVENT))
1012 panic("%s() called with NO_EVENT", __func__);
1013
1014 struct waitq *waitq;
1015 waitq = global_eventq(event);
55e303ae
A
1016
1017 s = splsched();
3e170ce0 1018 waitq_lock(waitq);
55e303ae 1019
316670eb 1020 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3e170ce0 1021 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
4bd07ac2 1022 VM_KERNEL_UNSLIDE_OR_PERM(event), interruptible, deadline, 0, 0);
39236c6e 1023
3e170ce0
A
1024 wresult = waitq_assert_wait64_locked(waitq, CAST_EVENT64_T(event),
1025 interruptible,
1026 TIMEOUT_URGENCY_SYS_NORMAL, deadline,
1027 TIMEOUT_NO_LEEWAY, thread);
3e170ce0 1028 waitq_unlock(waitq);
39236c6e 1029 splx(s);
3e170ce0 1030 return wresult;
39236c6e
A
1031}
1032
1033wait_result_t
1034assert_wait_deadline_with_leeway(
1035 event_t event,
1036 wait_interrupt_t interruptible,
1037 wait_timeout_urgency_t urgency,
1038 uint64_t deadline,
1039 uint64_t leeway)
1040{
1041 thread_t thread = current_thread();
1042 wait_result_t wresult;
39236c6e
A
1043 spl_t s;
1044
3e170ce0
A
1045 if (__improbable(event == NO_EVENT))
1046 panic("%s() called with NO_EVENT", __func__);
fe8ab488 1047
3e170ce0
A
1048 struct waitq *waitq;
1049 waitq = global_eventq(event);
39236c6e
A
1050
1051 s = splsched();
3e170ce0 1052 waitq_lock(waitq);
39236c6e
A
1053
1054 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3e170ce0 1055 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
4bd07ac2 1056 VM_KERNEL_UNSLIDE_OR_PERM(event), interruptible, deadline, 0, 0);
39236c6e 1057
3e170ce0
A
1058 wresult = waitq_assert_wait64_locked(waitq, CAST_EVENT64_T(event),
1059 interruptible,
1060 urgency, deadline, leeway,
1061 thread);
3e170ce0 1062 waitq_unlock(waitq);
55e303ae 1063 splx(s);
3e170ce0 1064 return wresult;
55e303ae 1065}
9bccf70c 1066
39236c6e
A
1067/*
1068 * thread_isoncpu:
1069 *
1070 * Return TRUE if a thread is running on a processor such that an AST
1071 * is needed to pull it out of userspace execution, or if executing in
1072 * the kernel, bring to a context switch boundary that would cause
1073 * thread state to be serialized in the thread PCB.
1074 *
1075 * Thread locked, returns the same way. While locked, fields
fe8ab488 1076 * like "state" cannot change. "runq" can change only from set to unset.
39236c6e
A
1077 */
1078static inline boolean_t
1079thread_isoncpu(thread_t thread)
1080{
1081 /* Not running or runnable */
1082 if (!(thread->state & TH_RUN))
1083 return (FALSE);
1084
1085 /* Waiting on a runqueue, not currently running */
fe8ab488 1086 /* TODO: This is invalid - it can get dequeued without thread lock, but not context switched. */
39236c6e
A
1087 if (thread->runq != PROCESSOR_NULL)
1088 return (FALSE);
1089
3e170ce0
A
1090 /*
1091 * Thread does not have a stack yet
1092 * It could be on the stack alloc queue or preparing to be invoked
1093 */
1094 if (!thread->kernel_stack)
1095 return (FALSE);
1096
39236c6e
A
1097 /*
1098 * Thread must be running on a processor, or
1099 * about to run, or just did run. In all these
1100 * cases, an AST to the processor is needed
1101 * to guarantee that the thread is kicked out
1102 * of userspace and the processor has
1103 * context switched (and saved register state).
1104 */
1105 return (TRUE);
1106}
1107
1c79356b 1108/*
91447636 1109 * thread_stop:
1c79356b 1110 *
91447636 1111 * Force a preemption point for a thread and wait
39236c6e
A
1112 * for it to stop running on a CPU. If a stronger
1113 * guarantee is requested, wait until no longer
1114 * runnable. Arbitrates access among
91447636 1115 * multiple stop requests. (released by unstop)
1c79356b 1116 *
91447636
A
1117 * The thread must enter a wait state and stop via a
1118 * separate means.
1c79356b 1119 *
91447636 1120 * Returns FALSE if interrupted.
1c79356b
A
1121 */
1122boolean_t
1123thread_stop(
39236c6e
A
1124 thread_t thread,
1125 boolean_t until_not_runnable)
1c79356b 1126{
91447636 1127 wait_result_t wresult;
2d21ac55 1128 spl_t s = splsched();
39236c6e 1129 boolean_t oncpu;
1c79356b 1130
1c79356b 1131 wake_lock(thread);
2d21ac55 1132 thread_lock(thread);
1c79356b
A
1133
1134 while (thread->state & TH_SUSP) {
1135 thread->wake_active = TRUE;
2d21ac55
A
1136 thread_unlock(thread);
1137
91447636 1138 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
1c79356b
A
1139 wake_unlock(thread);
1140 splx(s);
1141
91447636
A
1142 if (wresult == THREAD_WAITING)
1143 wresult = thread_block(THREAD_CONTINUE_NULL);
9bccf70c 1144
91447636 1145 if (wresult != THREAD_AWAKENED)
1c79356b
A
1146 return (FALSE);
1147
1148 s = splsched();
1149 wake_lock(thread);
2d21ac55 1150 thread_lock(thread);
1c79356b 1151 }
9bccf70c 1152
1c79356b 1153 thread->state |= TH_SUSP;
1c79356b 1154
39236c6e
A
1155 while ((oncpu = thread_isoncpu(thread)) ||
1156 (until_not_runnable && (thread->state & TH_RUN))) {
1157 processor_t processor;
1158
1159 if (oncpu) {
1160 assert(thread->state & TH_RUN);
1161 processor = thread->chosen_processor;
9bccf70c 1162 cause_ast_check(processor);
39236c6e 1163 }
9bccf70c
A
1164
1165 thread->wake_active = TRUE;
2d21ac55
A
1166 thread_unlock(thread);
1167
91447636 1168 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
9bccf70c
A
1169 wake_unlock(thread);
1170 splx(s);
1171
91447636
A
1172 if (wresult == THREAD_WAITING)
1173 wresult = thread_block(THREAD_CONTINUE_NULL);
9bccf70c 1174
91447636 1175 if (wresult != THREAD_AWAKENED) {
9bccf70c
A
1176 thread_unstop(thread);
1177 return (FALSE);
1178 }
1179
1180 s = splsched();
1181 wake_lock(thread);
1182 thread_lock(thread);
1183 }
1184
1185 thread_unlock(thread);
1c79356b
A
1186 wake_unlock(thread);
1187 splx(s);
39236c6e
A
1188
1189 /*
1190 * We return with the thread unlocked. To prevent it from
1191 * transitioning to a runnable state (or from TH_RUN to
1192 * being on the CPU), the caller must ensure the thread
1193 * is stopped via an external means (such as an AST)
1194 */
1c79356b
A
1195
1196 return (TRUE);
1197}
1198
1199/*
91447636
A
1200 * thread_unstop:
1201 *
1202 * Release a previous stop request and set
1203 * the thread running if appropriate.
1204 *
1205 * Use only after a successful stop operation.
1c79356b
A
1206 */
1207void
1208thread_unstop(
9bccf70c 1209 thread_t thread)
1c79356b 1210{
9bccf70c 1211 spl_t s = splsched();
1c79356b 1212
1c79356b
A
1213 wake_lock(thread);
1214 thread_lock(thread);
1215
3e170ce0 1216 assert((thread->state & (TH_RUN|TH_WAIT|TH_SUSP)) != TH_SUSP);
55e303ae 1217
1c79356b
A
1218 if (thread->state & TH_SUSP) {
1219 thread->state &= ~TH_SUSP;
1220
1221 if (thread->wake_active) {
1222 thread->wake_active = FALSE;
1223 thread_unlock(thread);
2d21ac55
A
1224
1225 thread_wakeup(&thread->wake_active);
1c79356b
A
1226 wake_unlock(thread);
1227 splx(s);
1c79356b
A
1228
1229 return;
1230 }
1231 }
1232
1233 thread_unlock(thread);
1234 wake_unlock(thread);
1235 splx(s);
1236}
1237
1238/*
91447636
A
1239 * thread_wait:
1240 *
1241 * Wait for a thread to stop running. (non-interruptible)
1242 *
1c79356b 1243 */
91447636 1244void
1c79356b 1245thread_wait(
316670eb
A
1246 thread_t thread,
1247 boolean_t until_not_runnable)
1c79356b 1248{
91447636 1249 wait_result_t wresult;
316670eb
A
1250 boolean_t oncpu;
1251 processor_t processor;
1252 spl_t s = splsched();
1c79356b 1253
1c79356b 1254 wake_lock(thread);
9bccf70c 1255 thread_lock(thread);
1c79356b 1256
316670eb
A
1257 /*
1258 * Wait until not running on a CPU. If stronger requirement
1259 * desired, wait until not runnable. Assumption: if thread is
1260 * on CPU, then TH_RUN is set, so we're not waiting in any case
1261 * where the original, pure "TH_RUN" check would have let us
1262 * finish.
1263 */
39236c6e 1264 while ((oncpu = thread_isoncpu(thread)) ||
316670eb 1265 (until_not_runnable && (thread->state & TH_RUN))) {
e7c99d92 1266
316670eb
A
1267 if (oncpu) {
1268 assert(thread->state & TH_RUN);
39236c6e 1269 processor = thread->chosen_processor;
9bccf70c 1270 cause_ast_check(processor);
316670eb 1271 }
1c79356b
A
1272
1273 thread->wake_active = TRUE;
2d21ac55
A
1274 thread_unlock(thread);
1275
91447636 1276 wresult = assert_wait(&thread->wake_active, THREAD_UNINT);
1c79356b
A
1277 wake_unlock(thread);
1278 splx(s);
1279
91447636
A
1280 if (wresult == THREAD_WAITING)
1281 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
1282
1283 s = splsched();
1284 wake_lock(thread);
9bccf70c 1285 thread_lock(thread);
1c79356b 1286 }
0b4e3aa0 1287
9bccf70c 1288 thread_unlock(thread);
1c79356b
A
1289 wake_unlock(thread);
1290 splx(s);
1c79356b
A
1291}
1292
1c79356b
A
1293/*
1294 * Routine: clear_wait_internal
1295 *
1296 * Clear the wait condition for the specified thread.
1297 * Start the thread executing if that is appropriate.
1298 * Arguments:
1299 * thread thread to awaken
1300 * result Wakeup result the thread should see
1301 * Conditions:
1302 * At splsched
1303 * the thread is locked.
9bccf70c
A
1304 * Returns:
1305 * KERN_SUCCESS thread was rousted out a wait
1306 * KERN_FAILURE thread was waiting but could not be rousted
1307 * KERN_NOT_WAITING thread was not waiting
1c79356b 1308 */
9bccf70c 1309__private_extern__ kern_return_t
1c79356b 1310clear_wait_internal(
9bccf70c 1311 thread_t thread,
55e303ae 1312 wait_result_t wresult)
1c79356b 1313{
39037602 1314 uint32_t i = LockTimeOutUsec;
3e170ce0 1315 struct waitq *waitq = thread->waitq;
39037602 1316
9bccf70c 1317 do {
55e303ae
A
1318 if (wresult == THREAD_INTERRUPTED && (thread->state & TH_UNINT))
1319 return (KERN_FAILURE);
9bccf70c 1320
3e170ce0 1321 if (waitq != NULL) {
39037602 1322 if (!waitq_pull_thread_locked(waitq, thread)) {
9bccf70c
A
1323 thread_unlock(thread);
1324 delay(1);
39037602
A
1325 if (i > 0 && !machine_timeout_suspended())
1326 i--;
9bccf70c 1327 thread_lock(thread);
3e170ce0
A
1328 if (waitq != thread->waitq)
1329 return KERN_NOT_WAITING;
9bccf70c
A
1330 continue;
1331 }
1c79356b 1332 }
55e303ae 1333
3e170ce0
A
1334 /* TODO: Can we instead assert TH_TERMINATE is not set? */
1335 if ((thread->state & (TH_WAIT|TH_TERMINATE)) == TH_WAIT)
1336 return (thread_go(thread, wresult));
1337 else
1338 return (KERN_NOT_WAITING);
39037602 1339 } while (i > 0);
55e303ae 1340
2d21ac55 1341 panic("clear_wait_internal: deadlock: thread=%p, wq=%p, cpu=%d\n",
3e170ce0 1342 thread, waitq, cpu_number());
55e303ae
A
1343
1344 return (KERN_FAILURE);
1c79356b
A
1345}
1346
1347
1348/*
1349 * clear_wait:
1350 *
1351 * Clear the wait condition for the specified thread. Start the thread
1352 * executing if that is appropriate.
1353 *
1354 * parameters:
1355 * thread thread to awaken
1356 * result Wakeup result the thread should see
1357 */
9bccf70c 1358kern_return_t
1c79356b 1359clear_wait(
9bccf70c
A
1360 thread_t thread,
1361 wait_result_t result)
1c79356b 1362{
9bccf70c 1363 kern_return_t ret;
1c79356b
A
1364 spl_t s;
1365
1366 s = splsched();
1367 thread_lock(thread);
9bccf70c 1368 ret = clear_wait_internal(thread, result);
1c79356b
A
1369 thread_unlock(thread);
1370 splx(s);
9bccf70c 1371 return ret;
1c79356b
A
1372}
1373
1374
1375/*
1376 * thread_wakeup_prim:
1377 *
1378 * Common routine for thread_wakeup, thread_wakeup_with_result,
1379 * and thread_wakeup_one.
1380 *
1381 */
9bccf70c 1382kern_return_t
1c79356b 1383thread_wakeup_prim(
39037602
A
1384 event_t event,
1385 boolean_t one_thread,
1386 wait_result_t result)
6d2010ae 1387{
39037602
A
1388 if (__improbable(event == NO_EVENT))
1389 panic("%s() called with NO_EVENT", __func__);
1390
1391 struct waitq *wq = global_eventq(event);
1392
1393 if (one_thread)
1394 return waitq_wakeup64_one(wq, CAST_EVENT64_T(event), result, WAITQ_ALL_PRIORITIES);
1395 else
1396 return waitq_wakeup64_all(wq, CAST_EVENT64_T(event), result, WAITQ_ALL_PRIORITIES);
6d2010ae
A
1397}
1398
39037602
A
1399/*
1400 * Wakeup a specified thread if and only if it's waiting for this event
1401 */
1402kern_return_t
1403thread_wakeup_thread(
1404 event_t event,
1405 thread_t thread)
1406{
1407 if (__improbable(event == NO_EVENT))
1408 panic("%s() called with NO_EVENT", __func__);
1409
1410 struct waitq *wq = global_eventq(event);
1411
1412 return waitq_wakeup64_thread(wq, CAST_EVENT64_T(event), thread, THREAD_AWAKENED);
1413}
6d2010ae 1414
39037602
A
1415/*
1416 * Wakeup a thread waiting on an event and promote it to a priority.
1417 *
1418 * Requires woken thread to un-promote itself when done.
1419 */
6d2010ae 1420kern_return_t
39037602
A
1421thread_wakeup_one_with_pri(
1422 event_t event,
1423 int priority)
1c79356b 1424{
3e170ce0
A
1425 if (__improbable(event == NO_EVENT))
1426 panic("%s() called with NO_EVENT", __func__);
1427
39037602 1428 struct waitq *wq = global_eventq(event);
1c79356b 1429
39037602
A
1430 return waitq_wakeup64_one(wq, CAST_EVENT64_T(event), THREAD_AWAKENED, priority);
1431}
fe8ab488 1432
39037602
A
1433/*
1434 * Wakeup a thread waiting on an event,
1435 * promote it to a priority,
1436 * and return a reference to the woken thread.
1437 *
1438 * Requires woken thread to un-promote itself when done.
1439 */
1440thread_t
1441thread_wakeup_identify(event_t event,
1442 int priority)
1443{
1444 if (__improbable(event == NO_EVENT))
1445 panic("%s() called with NO_EVENT", __func__);
1446
1447 struct waitq *wq = global_eventq(event);
1448
1449 return waitq_wakeup64_identify(wq, CAST_EVENT64_T(event), THREAD_AWAKENED, priority);
1c79356b
A
1450}
1451
1452/*
1453 * thread_bind:
1454 *
2d21ac55 1455 * Force the current thread to execute on the specified processor.
fe8ab488 1456 * Takes effect after the next thread_block().
1c79356b 1457 *
55e303ae
A
1458 * Returns the previous binding. PROCESSOR_NULL means
1459 * not bound.
1460 *
1461 * XXX - DO NOT export this to users - XXX
1c79356b 1462 */
55e303ae 1463processor_t
1c79356b 1464thread_bind(
2d21ac55 1465 processor_t processor)
1c79356b 1466{
2d21ac55 1467 thread_t self = current_thread();
55e303ae 1468 processor_t prev;
55e303ae 1469 spl_t s;
1c79356b
A
1470
1471 s = splsched();
2d21ac55 1472 thread_lock(self);
55e303ae 1473
3e170ce0 1474 prev = thread_bind_internal(self, processor);
55e303ae 1475
2d21ac55 1476 thread_unlock(self);
1c79356b 1477 splx(s);
55e303ae
A
1478
1479 return (prev);
1c79356b
A
1480}
1481
3e170ce0
A
1482/*
1483 * thread_bind_internal:
1484 *
1485 * If the specified thread is not the current thread, and it is currently
1486 * running on another CPU, a remote AST must be sent to that CPU to cause
1487 * the thread to migrate to its bound processor. Otherwise, the migration
1488 * will occur at the next quantum expiration or blocking point.
1489 *
1490 * When the thread is the current thread, and explicit thread_block() should
1491 * be used to force the current processor to context switch away and
1492 * let the thread migrate to the bound processor.
1493 *
1494 * Thread must be locked, and at splsched.
1495 */
1496
1497static processor_t
1498thread_bind_internal(
1499 thread_t thread,
1500 processor_t processor)
1501{
1502 processor_t prev;
1503
1504 /* <rdar://problem/15102234> */
1505 assert(thread->sched_pri < BASEPRI_RTQUEUES);
1506 /* A thread can't be bound if it's sitting on a (potentially incorrect) runqueue */
1507 assert(thread->runq == PROCESSOR_NULL);
1508
1509 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_THREAD_BIND), thread_tid(thread), processor ? (uintptr_t)processor->cpu_id : (uintptr_t)-1, 0, 0, 0);
1510
1511 prev = thread->bound_processor;
1512 thread->bound_processor = processor;
1513
1514 return (prev);
1515}
1516
1517/*
1518 * thread_vm_bind_group_add:
1519 *
1520 * The "VM bind group" is a special mechanism to mark a collection
1521 * of threads from the VM subsystem that, in general, should be scheduled
1522 * with only one CPU of parallelism. To accomplish this, we initially
1523 * bind all the threads to the master processor, which has the effect
1524 * that only one of the threads in the group can execute at once, including
1525 * preempting threads in the group that are a lower priority. Future
1526 * mechanisms may use more dynamic mechanisms to prevent the collection
1527 * of VM threads from using more CPU time than desired.
1528 *
1529 * The current implementation can result in priority inversions where
1530 * compute-bound priority 95 or realtime threads that happen to have
1531 * landed on the master processor prevent the VM threads from running.
1532 * When this situation is detected, we unbind the threads for one
1533 * scheduler tick to allow the scheduler to run the threads an
1534 * additional CPUs, before restoring the binding (assuming high latency
1535 * is no longer a problem).
1536 */
1537
1538/*
1539 * The current max is provisioned for:
1540 * vm_compressor_swap_trigger_thread (92)
1541 * 2 x vm_pageout_iothread_internal (92) when vm_restricted_to_single_processor==TRUE
1542 * vm_pageout_continue (92)
1543 * memorystatus_thread (95)
1544 */
1545#define MAX_VM_BIND_GROUP_COUNT (5)
1546decl_simple_lock_data(static,sched_vm_group_list_lock);
1547static thread_t sched_vm_group_thread_list[MAX_VM_BIND_GROUP_COUNT];
1548static int sched_vm_group_thread_count;
1549static boolean_t sched_vm_group_temporarily_unbound = FALSE;
1550
1551void
1552thread_vm_bind_group_add(void)
1553{
1554 thread_t self = current_thread();
1555
1556 thread_reference_internal(self);
1557 self->options |= TH_OPT_SCHED_VM_GROUP;
1558
1559 simple_lock(&sched_vm_group_list_lock);
1560 assert(sched_vm_group_thread_count < MAX_VM_BIND_GROUP_COUNT);
1561 sched_vm_group_thread_list[sched_vm_group_thread_count++] = self;
1562 simple_unlock(&sched_vm_group_list_lock);
1563
1564 thread_bind(master_processor);
1565
1566 /* Switch to bound processor if not already there */
1567 thread_block(THREAD_CONTINUE_NULL);
1568}
1569
1570static void
1571sched_vm_group_maintenance(void)
1572{
1573 uint64_t ctime = mach_absolute_time();
1574 uint64_t longtime = ctime - sched_tick_interval;
1575 int i;
1576 spl_t s;
1577 boolean_t high_latency_observed = FALSE;
1578 boolean_t runnable_and_not_on_runq_observed = FALSE;
1579 boolean_t bind_target_changed = FALSE;
1580 processor_t bind_target = PROCESSOR_NULL;
1581
1582 /* Make sure nobody attempts to add new threads while we are enumerating them */
1583 simple_lock(&sched_vm_group_list_lock);
1584
1585 s = splsched();
1586
1587 for (i=0; i < sched_vm_group_thread_count; i++) {
1588 thread_t thread = sched_vm_group_thread_list[i];
1589 assert(thread != THREAD_NULL);
1590 thread_lock(thread);
1591 if ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN) {
1592 if (thread->runq != PROCESSOR_NULL && thread->last_made_runnable_time < longtime) {
1593 high_latency_observed = TRUE;
1594 } else if (thread->runq == PROCESSOR_NULL) {
1595 /* There are some cases where a thread be transitiong that also fall into this case */
1596 runnable_and_not_on_runq_observed = TRUE;
1597 }
1598 }
1599 thread_unlock(thread);
1600
1601 if (high_latency_observed && runnable_and_not_on_runq_observed) {
1602 /* All the things we are looking for are true, stop looking */
1603 break;
1604 }
1605 }
1606
1607 splx(s);
1608
1609 if (sched_vm_group_temporarily_unbound) {
1610 /* If we turned off binding, make sure everything is OK before rebinding */
1611 if (!high_latency_observed) {
1612 /* rebind */
1613 bind_target_changed = TRUE;
1614 bind_target = master_processor;
1615 sched_vm_group_temporarily_unbound = FALSE; /* might be reset to TRUE if change cannot be completed */
1616 }
1617 } else {
1618 /*
1619 * Check if we're in a bad state, which is defined by high
1620 * latency with no core currently executing a thread. If a
1621 * single thread is making progress on a CPU, that means the
1622 * binding concept to reduce parallelism is working as
1623 * designed.
1624 */
1625 if (high_latency_observed && !runnable_and_not_on_runq_observed) {
1626 /* unbind */
1627 bind_target_changed = TRUE;
1628 bind_target = PROCESSOR_NULL;
1629 sched_vm_group_temporarily_unbound = TRUE;
1630 }
1631 }
1632
1633 if (bind_target_changed) {
1634 s = splsched();
1635 for (i=0; i < sched_vm_group_thread_count; i++) {
1636 thread_t thread = sched_vm_group_thread_list[i];
1637 boolean_t removed;
1638 assert(thread != THREAD_NULL);
1639
1640 thread_lock(thread);
1641 removed = thread_run_queue_remove(thread);
1642 if (removed || ((thread->state & (TH_RUN | TH_WAIT)) == TH_WAIT)) {
1643 thread_bind_internal(thread, bind_target);
1644 } else {
1645 /*
1646 * Thread was in the middle of being context-switched-to,
1647 * or was in the process of blocking. To avoid switching the bind
1648 * state out mid-flight, defer the change if possible.
1649 */
1650 if (bind_target == PROCESSOR_NULL) {
1651 thread_bind_internal(thread, bind_target);
1652 } else {
1653 sched_vm_group_temporarily_unbound = TRUE; /* next pass will try again */
1654 }
1655 }
1656
1657 if (removed) {
1658 thread_run_queue_reinsert(thread, SCHED_PREEMPT | SCHED_TAILQ);
1659 }
1660 thread_unlock(thread);
1661 }
1662 splx(s);
1663 }
1664
1665 simple_unlock(&sched_vm_group_list_lock);
1666}
1667
fe8ab488
A
1668/* Invoked prior to idle entry to determine if, on SMT capable processors, an SMT
1669 * rebalancing opportunity exists when a core is (instantaneously) idle, but
1670 * other SMT-capable cores may be over-committed. TODO: some possible negatives:
1671 * IPI thrash if this core does not remain idle following the load balancing ASTs
1672 * Idle "thrash", when IPI issue is followed by idle entry/core power down
1673 * followed by a wakeup shortly thereafter.
1674 */
1675
fe8ab488
A
1676#if (DEVELOPMENT || DEBUG)
1677int sched_smt_balance = 1;
1678#endif
1679
3e170ce0
A
1680#if __SMP__
1681/* Invoked with pset locked, returns with pset unlocked */
fe8ab488
A
1682static void
1683sched_SMT_balance(processor_t cprocessor, processor_set_t cpset) {
1684 processor_t ast_processor = NULL;
1685
1686#if (DEVELOPMENT || DEBUG)
1687 if (__improbable(sched_smt_balance == 0))
1688 goto smt_balance_exit;
1689#endif
1690
1691 assert(cprocessor == current_processor());
1692 if (cprocessor->is_SMT == FALSE)
1693 goto smt_balance_exit;
1694
1695 processor_t sib_processor = cprocessor->processor_secondary ? cprocessor->processor_secondary : cprocessor->processor_primary;
1696
1697 /* Determine if both this processor and its sibling are idle,
1698 * indicating an SMT rebalancing opportunity.
1699 */
1700 if (sib_processor->state != PROCESSOR_IDLE)
1701 goto smt_balance_exit;
1702
1703 processor_t sprocessor;
1704
39037602 1705 qe_foreach_element(sprocessor, &cpset->active_queue, processor_queue) {
fe8ab488
A
1706 if ((sprocessor->state == PROCESSOR_RUNNING) &&
1707 (sprocessor->processor_primary != sprocessor) &&
1708 (sprocessor->processor_primary->state == PROCESSOR_RUNNING) &&
1709 (sprocessor->current_pri < BASEPRI_RTQUEUES) &&
3e170ce0 1710 ((cpset->pending_AST_cpu_mask & (1ULL << sprocessor->cpu_id)) == 0)) {
fe8ab488
A
1711 assert(sprocessor != cprocessor);
1712 ast_processor = sprocessor;
1713 break;
1714 }
fe8ab488
A
1715 }
1716
1717smt_balance_exit:
1718 pset_unlock(cpset);
1719
1720 if (ast_processor) {
1721 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_SMT_BALANCE), ast_processor->cpu_id, ast_processor->state, ast_processor->processor_primary->state, 0, 0);
1722 cause_ast_check(ast_processor);
1723 }
1724}
3e170ce0 1725#endif /* __SMP__ */
fe8ab488 1726
1c79356b 1727/*
2d21ac55
A
1728 * thread_select:
1729 *
1730 * Select a new thread for the current processor to execute.
55e303ae
A
1731 *
1732 * May select the current thread, which must be locked.
1c79356b 1733 */
2d21ac55 1734static thread_t
1c79356b 1735thread_select(
2d21ac55 1736 thread_t thread,
fe8ab488
A
1737 processor_t processor,
1738 ast_t reason)
1c79356b 1739{
2d21ac55 1740 processor_set_t pset = processor->processor_set;
cf7d32b8 1741 thread_t new_thread = THREAD_NULL;
1c79356b 1742
6d2010ae 1743 assert(processor == current_processor());
3e170ce0 1744 assert((thread->state & (TH_RUN|TH_TERMINATE2)) == TH_RUN);
6d2010ae 1745
2d21ac55
A
1746 do {
1747 /*
1748 * Update the priority.
1749 */
6d2010ae
A
1750 if (SCHED(can_update_priority)(thread))
1751 SCHED(update_priority)(thread);
1752
2d21ac55 1753 processor->current_pri = thread->sched_pri;
6d2010ae 1754 processor->current_thmode = thread->sched_mode;
fe8ab488 1755 processor->current_sfi_class = thread->sfi_class;
1c79356b 1756
2d21ac55
A
1757 pset_lock(pset);
1758
fe8ab488 1759 assert(processor->state != PROCESSOR_OFF_LINE);
6d2010ae 1760
3e170ce0
A
1761 if (!processor->is_recommended) {
1762 /*
1763 * The performance controller has provided a hint to not dispatch more threads,
1764 * unless they are bound to us (and thus we are the only option
1765 */
1766 if (!SCHED(processor_bound_count)(processor)) {
1767 goto idle;
1768 }
1769 } else if (processor->processor_primary != processor) {
39236c6e
A
1770 /*
1771 * Should this secondary SMT processor attempt to find work? For pset runqueue systems,
1772 * we should look for work only under the same conditions that choose_processor()
1773 * would have assigned work, which is when all primary processors have been assigned work.
1774 *
1775 * An exception is that bound threads are dispatched to a processor without going through
1776 * choose_processor(), so in those cases we should continue trying to dequeue work.
1777 */
fe8ab488 1778 if (!SCHED(processor_bound_count)(processor) && !queue_empty(&pset->idle_queue) && !rt_runq.count) {
39236c6e
A
1779 goto idle;
1780 }
1781 }
1782
3e170ce0 1783 rt_lock_lock();
2d21ac55 1784
2d21ac55
A
1785 /*
1786 * Test to see if the current thread should continue
3e170ce0 1787 * to run on this processor. Must not be attempting to wait, and not
2d21ac55 1788 * bound to a different processor, nor be in the wrong
3e170ce0
A
1789 * processor set, nor be forced to context switch by TH_SUSP.
1790 *
1791 * Note that there are never any RT threads in the regular runqueue.
1792 *
1793 * This code is very insanely tricky.
2d21ac55 1794 */
3e170ce0
A
1795
1796 if (((thread->state & (TH_TERMINATE|TH_IDLE|TH_WAIT|TH_RUN|TH_SUSP)) == TH_RUN) &&
fe8ab488
A
1797 (thread->sched_pri >= BASEPRI_RTQUEUES || processor->processor_primary == processor) &&
1798 (thread->bound_processor == PROCESSOR_NULL || thread->bound_processor == processor) &&
1799 (thread->affinity_set == AFFINITY_SET_NULL || thread->affinity_set->aset_pset == pset)) {
3e170ce0
A
1800 /*
1801 * RT threads with un-expired quantum stay on processor,
1802 * unless there's a valid RT thread with an earlier deadline.
1803 */
1804 if (thread->sched_pri >= BASEPRI_RTQUEUES && processor->first_timeslice) {
6d2010ae 1805 if (rt_runq.count > 0) {
39037602 1806 thread_t next_rt = qe_queue_first(&rt_runq.queue, struct thread, runq_links);
3e170ce0
A
1807
1808 assert(next_rt->runq == THREAD_ON_RT_RUNQ);
1809
fe8ab488 1810 if (next_rt->realtime.deadline < processor->deadline &&
3e170ce0
A
1811 (next_rt->bound_processor == PROCESSOR_NULL ||
1812 next_rt->bound_processor == processor)) {
1813 /* The next RT thread is better, so pick it off the runqueue. */
1814 goto pick_new_rt_thread;
55e303ae
A
1815 }
1816 }
2d21ac55 1817
3e170ce0 1818 /* This is still the best RT thread to run. */
2d21ac55
A
1819 processor->deadline = thread->realtime.deadline;
1820
3e170ce0 1821 rt_lock_unlock();
2d21ac55
A
1822 pset_unlock(pset);
1823
1824 return (thread);
55e303ae
A
1825 }
1826
3e170ce0
A
1827 if ((rt_runq.count == 0) &&
1828 SCHED(processor_queue_has_priority)(processor, thread->sched_pri, TRUE) == FALSE) {
fe8ab488 1829 /* This thread is still the highest priority runnable (non-idle) thread */
2d21ac55 1830 processor->deadline = UINT64_MAX;
55e303ae 1831
3e170ce0 1832 rt_lock_unlock();
2d21ac55 1833 pset_unlock(pset);
55e303ae 1834
2d21ac55
A
1835 return (thread);
1836 }
1837 }
1838
3e170ce0
A
1839 /* OK, so we're not going to run the current thread. Look at the RT queue. */
1840 if (rt_runq.count > 0) {
39037602 1841 thread_t next_rt = qe_queue_first(&rt_runq.queue, struct thread, runq_links);
c910b4d9 1842
3e170ce0 1843 assert(next_rt->runq == THREAD_ON_RT_RUNQ);
fe8ab488 1844
3e170ce0
A
1845 if (__probable((next_rt->bound_processor == PROCESSOR_NULL ||
1846 (next_rt->bound_processor == processor)))) {
1847pick_new_rt_thread:
39037602 1848 new_thread = qe_dequeue_head(&rt_runq.queue, struct thread, runq_links);
6d2010ae 1849
3e170ce0 1850 new_thread->runq = PROCESSOR_NULL;
39236c6e
A
1851 SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
1852 rt_runq.count--;
6d2010ae 1853
3e170ce0 1854 processor->deadline = new_thread->realtime.deadline;
c910b4d9 1855
3e170ce0 1856 rt_lock_unlock();
39236c6e 1857 pset_unlock(pset);
c910b4d9 1858
3e170ce0 1859 return (new_thread);
39236c6e 1860 }
c910b4d9 1861 }
2d21ac55 1862
3e170ce0
A
1863 processor->deadline = UINT64_MAX;
1864 rt_lock_unlock();
6d2010ae 1865
3e170ce0
A
1866 /* No RT threads, so let's look at the regular threads. */
1867 if ((new_thread = SCHED(choose_thread)(processor, MINPRI, reason)) != THREAD_NULL) {
6d2010ae 1868 pset_unlock(pset);
6d2010ae
A
1869 return (new_thread);
1870 }
c910b4d9 1871
3e170ce0
A
1872#if __SMP__
1873 if (SCHED(steal_thread_enabled)) {
1874 /*
1875 * No runnable threads, attempt to steal
1876 * from other processors. Returns with pset lock dropped.
1877 */
2d21ac55 1878
3e170ce0
A
1879 if ((new_thread = SCHED(steal_thread)(pset)) != THREAD_NULL) {
1880 return (new_thread);
1881 }
cf7d32b8 1882
3e170ce0
A
1883 /*
1884 * If other threads have appeared, shortcut
1885 * around again.
1886 */
1887 if (!SCHED(processor_queue_empty)(processor) || rt_runq.count > 0)
1888 continue;
1889
1890 pset_lock(pset);
1891 }
1892#endif
55e303ae 1893
39236c6e 1894 idle:
1c79356b
A
1895 /*
1896 * Nothing is runnable, so set this processor idle if it
2d21ac55 1897 * was running.
1c79356b 1898 */
55e303ae 1899 if (processor->state == PROCESSOR_RUNNING) {
55e303ae 1900 processor->state = PROCESSOR_IDLE;
1c79356b 1901
fe8ab488 1902 if (processor->processor_primary == processor) {
39037602
A
1903 re_queue_head(&pset->idle_queue, &processor->processor_queue);
1904 } else {
1905 re_queue_head(&pset->idle_secondary_queue, &processor->processor_queue);
b0d623f7 1906 }
1c79356b 1907 }
1c79356b 1908
3e170ce0 1909#if __SMP__
fe8ab488
A
1910 /* Invoked with pset locked, returns with pset unlocked */
1911 sched_SMT_balance(processor, pset);
3e170ce0
A
1912#else
1913 pset_unlock(pset);
1914#endif
2d21ac55 1915
6d2010ae 1916#if CONFIG_SCHED_IDLE_IN_PLACE
2d21ac55
A
1917 /*
1918 * Choose idle thread if fast idle is not possible.
1919 */
fe8ab488
A
1920 if (processor->processor_primary != processor)
1921 return (processor->idle_thread);
1922
6d2010ae 1923 if ((thread->state & (TH_IDLE|TH_TERMINATE|TH_SUSP)) || !(thread->state & TH_WAIT) || thread->wake_active || thread->sched_pri >= BASEPRI_RTQUEUES)
2d21ac55
A
1924 return (processor->idle_thread);
1925
1926 /*
1927 * Perform idling activities directly without a
1928 * context switch. Return dispatched thread,
1929 * else check again for a runnable thread.
1930 */
1931 new_thread = thread_select_idle(thread, processor);
1932
6d2010ae
A
1933#else /* !CONFIG_SCHED_IDLE_IN_PLACE */
1934
1935 /*
1936 * Do a full context switch to idle so that the current
1937 * thread can start running on another processor without
1938 * waiting for the fast-idled processor to wake up.
1939 */
3e170ce0 1940 new_thread = processor->idle_thread;
6d2010ae
A
1941
1942#endif /* !CONFIG_SCHED_IDLE_IN_PLACE */
1943
2d21ac55
A
1944 } while (new_thread == THREAD_NULL);
1945
1946 return (new_thread);
1947}
1948
6d2010ae 1949#if CONFIG_SCHED_IDLE_IN_PLACE
2d21ac55
A
1950/*
1951 * thread_select_idle:
1952 *
1953 * Idle the processor using the current thread context.
1954 *
1955 * Called with thread locked, then dropped and relocked.
1956 */
1957static thread_t
1958thread_select_idle(
1959 thread_t thread,
1960 processor_t processor)
1961{
1962 thread_t new_thread;
39236c6e
A
1963 uint64_t arg1, arg2;
1964 int urgency;
1965
fe8ab488 1966 sched_run_decr(thread);
2d21ac55
A
1967
1968 thread->state |= TH_IDLE;
1969 processor->current_pri = IDLEPRI;
6d2010ae 1970 processor->current_thmode = TH_MODE_NONE;
fe8ab488 1971 processor->current_sfi_class = SFI_CLASS_KERNEL;
2d21ac55 1972
316670eb
A
1973 /* Reload precise timing global policy to thread-local policy */
1974 thread->precise_user_kernel_time = use_precise_user_kernel_time(thread);
1975
2d21ac55
A
1976 thread_unlock(thread);
1977
1978 /*
1979 * Switch execution timing to processor idle thread.
1980 */
1981 processor->last_dispatch = mach_absolute_time();
fe8ab488
A
1982
1983#ifdef CONFIG_MACH_APPROXIMATE_TIME
1984 commpage_update_mach_approximate_time(processor->last_dispatch);
1985#endif
1986
6d2010ae 1987 thread->last_run_time = processor->last_dispatch;
2d21ac55
A
1988 thread_timer_event(processor->last_dispatch, &processor->idle_thread->system_timer);
1989 PROCESSOR_DATA(processor, kernel_timer) = &processor->idle_thread->system_timer;
1990
1991 /*
1992 * Cancel the quantum timer while idling.
1993 */
1994 timer_call_cancel(&processor->quantum_timer);
3e170ce0 1995 processor->first_timeslice = FALSE;
2d21ac55
A
1996
1997 (*thread->sched_call)(SCHED_CALL_BLOCK, thread);
1998
3e170ce0 1999 thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0, 0, NULL);
6d2010ae 2000
2d21ac55
A
2001 /*
2002 * Enable interrupts and perform idling activities. No
2003 * preemption due to TH_IDLE being set.
2004 */
2005 spllo(); new_thread = processor_idle(thread, processor);
2006
cf7d32b8
A
2007 /*
2008 * Return at splsched.
2009 */
2d21ac55
A
2010 (*thread->sched_call)(SCHED_CALL_UNBLOCK, thread);
2011
2012 thread_lock(thread);
2013
2014 /*
2015 * If awakened, switch to thread timer and start a new quantum.
2016 * Otherwise skip; we will context switch to another thread or return here.
2017 */
2018 if (!(thread->state & TH_WAIT)) {
2019 processor->last_dispatch = mach_absolute_time();
2020 thread_timer_event(processor->last_dispatch, &thread->system_timer);
2021 PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
2022
2023 thread_quantum_init(thread);
fe8ab488
A
2024 processor->quantum_end = processor->last_dispatch + thread->quantum_remaining;
2025 timer_call_enter1(&processor->quantum_timer, thread, processor->quantum_end, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL);
3e170ce0 2026 processor->first_timeslice = TRUE;
2d21ac55
A
2027
2028 thread->computation_epoch = processor->last_dispatch;
1c79356b
A
2029 }
2030
2d21ac55 2031 thread->state &= ~TH_IDLE;
55e303ae 2032
39236c6e
A
2033 urgency = thread_get_urgency(thread, &arg1, &arg2);
2034
3e170ce0 2035 thread_tell_urgency(urgency, arg1, arg2, 0, new_thread);
39236c6e 2036
fe8ab488 2037 sched_run_incr(thread);
39236c6e 2038
2d21ac55 2039 return (new_thread);
1c79356b 2040}
6d2010ae
A
2041#endif /* CONFIG_SCHED_IDLE_IN_PLACE */
2042
b0d623f7 2043/*
3e170ce0 2044 * thread_invoke
b0d623f7 2045 *
3e170ce0 2046 * Called at splsched with neither thread locked.
b0d623f7 2047 *
3e170ce0 2048 * Perform a context switch and start executing the new thread.
55e303ae 2049 *
3e170ce0
A
2050 * Returns FALSE when the context switch didn't happen.
2051 * The reference to the new thread is still consumed.
39236c6e
A
2052 *
2053 * "self" is what is currently running on the processor,
2054 * "thread" is the new thread to context switch to
2055 * (which may be the same thread in some cases)
2056 */
2d21ac55 2057static boolean_t
1c79356b 2058thread_invoke(
39236c6e
A
2059 thread_t self,
2060 thread_t thread,
91447636 2061 ast_t reason)
1c79356b 2062{
39236c6e 2063 if (__improbable(get_preemption_level() != 0)) {
b0d623f7
A
2064 int pl = get_preemption_level();
2065 panic("thread_invoke: preemption_level %d, possible cause: %s",
2066 pl, (pl < 0 ? "unlocking an unlocked mutex or spinlock" :
2067 "blocking while holding a spinlock, or within interrupt context"));
2068 }
0b4e3aa0 2069
3e170ce0
A
2070 thread_continue_t continuation = self->continuation;
2071 void *parameter = self->parameter;
2072 processor_t processor;
2073
2074 uint64_t ctime = mach_absolute_time();
2075
2076#ifdef CONFIG_MACH_APPROXIMATE_TIME
2077 commpage_update_mach_approximate_time(ctime);
2078#endif
2079
2080#if defined(CONFIG_SCHED_TIMESHARE_CORE)
2081 sched_timeshare_consider_maintenance(ctime);
2082#endif
2083
39037602 2084 assert_thread_magic(self);
2d21ac55 2085 assert(self == current_thread());
fe8ab488 2086 assert(self->runq == PROCESSOR_NULL);
3e170ce0 2087 assert((self->state & (TH_RUN|TH_TERMINATE2)) == TH_RUN);
91447636 2088
2d21ac55 2089 thread_lock(thread);
1c79356b 2090
39037602 2091 assert_thread_magic(thread);
3e170ce0 2092 assert((thread->state & (TH_RUN|TH_WAIT|TH_UNINT|TH_TERMINATE|TH_TERMINATE2)) == TH_RUN);
fe8ab488
A
2093 assert(thread->bound_processor == PROCESSOR_NULL || thread->bound_processor == current_processor());
2094 assert(thread->runq == PROCESSOR_NULL);
1c79356b 2095
316670eb
A
2096 /* Reload precise timing global policy to thread-local policy */
2097 thread->precise_user_kernel_time = use_precise_user_kernel_time(thread);
3e170ce0 2098
fe8ab488
A
2099 /* Update SFI class based on other factors */
2100 thread->sfi_class = sfi_thread_classify(thread);
2101
3e170ce0 2102 /* Allow realtime threads to hang onto a stack. */
6d2010ae 2103 if ((self->sched_mode == TH_MODE_REALTIME) && !self->reserved_stack)
2d21ac55 2104 self->reserved_stack = self->kernel_stack;
1c79356b 2105
91447636 2106 if (continuation != NULL) {
2d21ac55 2107 if (!thread->kernel_stack) {
9bccf70c 2108 /*
2d21ac55 2109 * If we are using a privileged stack,
9bccf70c 2110 * check to see whether we can exchange it with
2d21ac55 2111 * that of the other thread.
9bccf70c 2112 */
2d21ac55 2113 if (self->kernel_stack == self->reserved_stack && !thread->reserved_stack)
9bccf70c 2114 goto need_stack;
1c79356b 2115
91447636
A
2116 /*
2117 * Context switch by performing a stack handoff.
2118 */
2d21ac55
A
2119 continuation = thread->continuation;
2120 parameter = thread->parameter;
1c79356b 2121
9bccf70c 2122 processor = current_processor();
2d21ac55
A
2123 processor->active_thread = thread;
2124 processor->current_pri = thread->sched_pri;
6d2010ae 2125 processor->current_thmode = thread->sched_mode;
fe8ab488 2126 processor->current_sfi_class = thread->sfi_class;
2d21ac55
A
2127 if (thread->last_processor != processor && thread->last_processor != NULL) {
2128 if (thread->last_processor->processor_set != processor->processor_set)
2129 thread->ps_switch++;
2130 thread->p_switch++;
2131 }
2132 thread->last_processor = processor;
2133 thread->c_switch++;
2134 ast_context(thread);
3e170ce0 2135
2d21ac55 2136 thread_unlock(thread);
1c79356b 2137
2d21ac55 2138 self->reason = reason;
91447636 2139
39236c6e
A
2140 processor->last_dispatch = ctime;
2141 self->last_run_time = ctime;
2142 thread_timer_event(ctime, &thread->system_timer);
2d21ac55 2143 PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
316670eb
A
2144
2145 /*
2146 * Since non-precise user/kernel time doesn't update the state timer
2147 * during privilege transitions, synthesize an event now.
2148 */
2149 if (!thread->precise_user_kernel_time) {
2150 timer_switch(PROCESSOR_DATA(processor, current_state),
39236c6e 2151 ctime,
316670eb
A
2152 PROCESSOR_DATA(processor, current_state));
2153 }
2d21ac55 2154
316670eb
A
2155 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2156 MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_HANDOFF)|DBG_FUNC_NONE,
2157 self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
1c79356b 2158
39236c6e 2159 if ((thread->chosen_processor != processor) && (thread->chosen_processor != PROCESSOR_NULL)) {
3e170ce0 2160 SCHED_DEBUG_CHOOSE_PROCESSOR_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_MOVED)|DBG_FUNC_NONE,
6d2010ae
A
2161 (uintptr_t)thread_tid(thread), (uintptr_t)thread->chosen_processor->cpu_id, 0, 0, 0);
2162 }
2163
b0d623f7
A
2164 DTRACE_SCHED2(off__cpu, struct thread *, thread, struct proc *, thread->task->bsd_info);
2165
6d2010ae
A
2166 SCHED_STATS_CSW(processor, self->reason, self->sched_pri, thread->sched_pri);
2167
2168 TLOG(1, "thread_invoke: calling stack_handoff\n");
2169 stack_handoff(self, thread);
9bccf70c 2170
3e170ce0
A
2171 /* 'self' is now off core */
2172 assert(thread == current_thread());
2173
b0d623f7
A
2174 DTRACE_SCHED(on__cpu);
2175
39037602
A
2176#if KPERF
2177 kperf_on_cpu(thread, continuation, NULL);
2178#endif /* KPERF */
2179
2d21ac55 2180 thread_dispatch(self, thread);
1c79356b 2181
2d21ac55 2182 thread->continuation = thread->parameter = NULL;
1c79356b 2183
2d21ac55 2184 counter(c_thread_invoke_hits++);
1c79356b 2185
9bccf70c 2186 (void) spllo();
1c79356b 2187
2d21ac55
A
2188 assert(continuation);
2189 call_continuation(continuation, parameter, thread->wait_result);
9bccf70c 2190 /*NOTREACHED*/
9bccf70c 2191 }
2d21ac55 2192 else if (thread == self) {
9bccf70c 2193 /* same thread but with continuation */
2d21ac55 2194 ast_context(self);
9bccf70c 2195 counter(++c_thread_invoke_same);
3e170ce0 2196
2d21ac55 2197 thread_unlock(self);
9bccf70c 2198
39037602
A
2199#if KPERF
2200 kperf_on_cpu(thread, continuation, NULL);
2201#endif /* KPERF */
2202
316670eb
A
2203 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2204 MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
2205 self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
6d2010ae 2206
2d21ac55
A
2207 self->continuation = self->parameter = NULL;
2208
9bccf70c 2209 (void) spllo();
55e303ae 2210
2d21ac55 2211 call_continuation(continuation, parameter, self->wait_result);
9bccf70c
A
2212 /*NOTREACHED*/
2213 }
3e170ce0 2214 } else {
9bccf70c 2215 /*
2d21ac55 2216 * Check that the other thread has a stack
9bccf70c 2217 */
2d21ac55 2218 if (!thread->kernel_stack) {
9bccf70c 2219need_stack:
2d21ac55
A
2220 if (!stack_alloc_try(thread)) {
2221 counter(c_thread_invoke_misses++);
2222 thread_unlock(thread);
2223 thread_stack_enqueue(thread);
9bccf70c
A
2224 return (FALSE);
2225 }
3e170ce0 2226 } else if (thread == self) {
2d21ac55 2227 ast_context(self);
9bccf70c 2228 counter(++c_thread_invoke_same);
2d21ac55 2229 thread_unlock(self);
6d2010ae 2230
316670eb
A
2231 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2232 MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
2233 self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
6d2010ae 2234
9bccf70c
A
2235 return (TRUE);
2236 }
2237 }
1c79356b
A
2238
2239 /*
91447636 2240 * Context switch by full context save.
1c79356b 2241 */
9bccf70c 2242 processor = current_processor();
2d21ac55
A
2243 processor->active_thread = thread;
2244 processor->current_pri = thread->sched_pri;
6d2010ae 2245 processor->current_thmode = thread->sched_mode;
fe8ab488 2246 processor->current_sfi_class = thread->sfi_class;
2d21ac55
A
2247 if (thread->last_processor != processor && thread->last_processor != NULL) {
2248 if (thread->last_processor->processor_set != processor->processor_set)
2249 thread->ps_switch++;
2250 thread->p_switch++;
2251 }
2252 thread->last_processor = processor;
2253 thread->c_switch++;
2254 ast_context(thread);
3e170ce0 2255
2d21ac55 2256 thread_unlock(thread);
1c79356b 2257
2d21ac55 2258 counter(c_thread_invoke_csw++);
1c79356b 2259
2d21ac55 2260 self->reason = reason;
1c79356b 2261
39236c6e
A
2262 processor->last_dispatch = ctime;
2263 self->last_run_time = ctime;
2264 thread_timer_event(ctime, &thread->system_timer);
2d21ac55 2265 PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
91447636 2266
316670eb
A
2267 /*
2268 * Since non-precise user/kernel time doesn't update the state timer
2269 * during privilege transitions, synthesize an event now.
2270 */
2271 if (!thread->precise_user_kernel_time) {
2272 timer_switch(PROCESSOR_DATA(processor, current_state),
39236c6e 2273 ctime,
316670eb
A
2274 PROCESSOR_DATA(processor, current_state));
2275 }
3e170ce0 2276
316670eb
A
2277 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2278 MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
2279 self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
b0d623f7 2280
6d2010ae 2281 if ((thread->chosen_processor != processor) && (thread->chosen_processor != NULL)) {
3e170ce0 2282 SCHED_DEBUG_CHOOSE_PROCESSOR_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_MOVED)|DBG_FUNC_NONE,
6d2010ae
A
2283 (uintptr_t)thread_tid(thread), (uintptr_t)thread->chosen_processor->cpu_id, 0, 0, 0);
2284 }
2285
b0d623f7 2286 DTRACE_SCHED2(off__cpu, struct thread *, thread, struct proc *, thread->task->bsd_info);
1c79356b 2287
6d2010ae
A
2288 SCHED_STATS_CSW(processor, self->reason, self->sched_pri, thread->sched_pri);
2289
1c79356b 2290 /*
91447636 2291 * This is where we actually switch register context,
2d21ac55
A
2292 * and address space if required. We will next run
2293 * as a result of a subsequent context switch.
3e170ce0
A
2294 *
2295 * Once registers are switched and the processor is running "thread",
2296 * the stack variables and non-volatile registers will contain whatever
2297 * was there the last time that thread blocked. No local variables should
2298 * be used after this point, except for the special case of "thread", which
2299 * the platform layer returns as the previous thread running on the processor
2300 * via the function call ABI as a return register, and "self", which may have
2301 * been stored on the stack or a non-volatile register, but a stale idea of
2302 * what was on the CPU is newly-accurate because that thread is again
2303 * running on the CPU.
91447636 2304 */
316670eb 2305 assert(continuation == self->continuation);
2d21ac55 2306 thread = machine_switch_context(self, continuation, thread);
316670eb 2307 assert(self == current_thread());
b0d623f7
A
2308 TLOG(1,"thread_invoke: returning machine_switch_context: self %p continuation %p thread %p\n", self, continuation, thread);
2309
2310 DTRACE_SCHED(on__cpu);
1c79356b 2311
39037602
A
2312#if KPERF
2313 kperf_on_cpu(self, NULL, __builtin_frame_address(0));
2314#endif /* KPERF */
2315
1c79356b 2316 /*
2d21ac55 2317 * We have been resumed and are set to run.
1c79356b 2318 */
2d21ac55 2319 thread_dispatch(thread, self);
9bccf70c 2320
91447636 2321 if (continuation) {
2d21ac55
A
2322 self->continuation = self->parameter = NULL;
2323
9bccf70c 2324 (void) spllo();
55e303ae 2325
2d21ac55 2326 call_continuation(continuation, parameter, self->wait_result);
9bccf70c 2327 /*NOTREACHED*/
1c79356b
A
2328 }
2329
9bccf70c 2330 return (TRUE);
1c79356b
A
2331}
2332
3e170ce0
A
2333#if defined(CONFIG_SCHED_DEFERRED_AST)
2334/*
2335 * pset_cancel_deferred_dispatch:
2336 *
2337 * Cancels all ASTs that we can cancel for the given processor set
2338 * if the current processor is running the last runnable thread in the
2339 * system.
2340 *
2341 * This function assumes the current thread is runnable. This must
2342 * be called with the pset unlocked.
2343 */
2344static void
2345pset_cancel_deferred_dispatch(
2346 processor_set_t pset,
2347 processor_t processor)
2348{
2349 processor_t active_processor = NULL;
2350 uint32_t sampled_sched_run_count;
2351
2352 pset_lock(pset);
39037602 2353 sampled_sched_run_count = (volatile uint32_t) sched_run_buckets[TH_BUCKET_RUN];
3e170ce0
A
2354
2355 /*
2356 * If we have emptied the run queue, and our current thread is runnable, we
2357 * should tell any processors that are still DISPATCHING that they will
2358 * probably not have any work to do. In the event that there are no
2359 * pending signals that we can cancel, this is also uninteresting.
2360 *
2361 * In the unlikely event that another thread becomes runnable while we are
2362 * doing this (sched_run_count is atomically updated, not guarded), the
2363 * codepath making it runnable SHOULD (a dangerous word) need the pset lock
2364 * in order to dispatch it to a processor in our pset. So, the other
2365 * codepath will wait while we squash all cancelable ASTs, get the pset
2366 * lock, and then dispatch the freshly runnable thread. So this should be
2367 * correct (we won't accidentally have a runnable thread that hasn't been
2368 * dispatched to an idle processor), if not ideal (we may be restarting the
2369 * dispatch process, which could have some overhead).
2370 *
2371 */
2372 if ((sampled_sched_run_count == 1) &&
2373 (pset->pending_deferred_AST_cpu_mask)) {
2374 qe_foreach_element_safe(active_processor, &pset->active_queue, processor_queue) {
2375 /*
2376 * If a processor is DISPATCHING, it could be because of
2377 * a cancelable signal.
2378 *
2379 * IF the processor is not our
2380 * current processor (the current processor should not
2381 * be DISPATCHING, so this is a bit paranoid), AND there
2382 * is a cancelable signal pending on the processor, AND
2383 * there is no non-cancelable signal pending (as there is
2384 * no point trying to backtrack on bringing the processor
2385 * up if a signal we cannot cancel is outstanding), THEN
2386 * it should make sense to roll back the processor state
2387 * to the IDLE state.
2388 *
2389 * If the racey nature of this approach (as the signal
2390 * will be arbitrated by hardware, and can fire as we
2391 * roll back state) results in the core responding
2392 * despite being pushed back to the IDLE state, it
2393 * should be no different than if the core took some
2394 * interrupt while IDLE.
2395 */
2396 if ((active_processor->state == PROCESSOR_DISPATCHING) &&
2397 (pset->pending_deferred_AST_cpu_mask & (1ULL << active_processor->cpu_id)) &&
2398 (!(pset->pending_AST_cpu_mask & (1ULL << active_processor->cpu_id))) &&
2399 (active_processor != processor)) {
2400 /*
2401 * Squash all of the processor state back to some
2402 * reasonable facsimile of PROCESSOR_IDLE.
2403 *
2404 * TODO: What queue policy do we actually want here?
2405 * We want to promote selection of a good processor
2406 * to run on. Do we want to enqueue at the head?
2407 * The tail? At the (relative) old position in the
2408 * queue? Or something else entirely?
2409 */
39037602 2410 re_queue_head(&pset->idle_queue, &active_processor->processor_queue);
3e170ce0
A
2411
2412 assert(active_processor->next_thread == THREAD_NULL);
2413
2414 active_processor->current_pri = IDLEPRI;
2415 active_processor->current_thmode = TH_MODE_FIXED;
2416 active_processor->current_sfi_class = SFI_CLASS_KERNEL;
2417 active_processor->deadline = UINT64_MAX;
2418 active_processor->state = PROCESSOR_IDLE;
2419 pset->pending_deferred_AST_cpu_mask &= ~(1U << active_processor->cpu_id);
2420 machine_signal_idle_cancel(active_processor);
2421 }
2422
2423 }
2424 }
2425
2426 pset_unlock(pset);
2427}
2428#else
2429/* We don't support deferred ASTs; everything is candycanes and sunshine. */
2430#endif
2431
1c79356b 2432/*
2d21ac55 2433 * thread_dispatch:
1c79356b 2434 *
2d21ac55
A
2435 * Handle threads at context switch. Re-dispatch other thread
2436 * if still running, otherwise update run state and perform
2437 * special actions. Update quantum for other thread and begin
2438 * the quantum for ourselves.
91447636 2439 *
3e170ce0
A
2440 * "thread" is the old thread that we have switched away from.
2441 * "self" is the new current thread that we have context switched to
39236c6e 2442 *
91447636 2443 * Called at splsched.
1c79356b
A
2444 */
2445void
2d21ac55
A
2446thread_dispatch(
2447 thread_t thread,
2448 thread_t self)
1c79356b 2449{
2d21ac55
A
2450 processor_t processor = self->last_processor;
2451
3e170ce0
A
2452 assert(processor == current_processor());
2453 assert(self == current_thread());
2454 assert(thread != self);
2455
2d21ac55 2456 if (thread != THREAD_NULL) {
91447636 2457 /*
2d21ac55
A
2458 * If blocked at a continuation, discard
2459 * the stack.
91447636 2460 */
2d21ac55
A
2461 if (thread->continuation != NULL && thread->kernel_stack != 0)
2462 stack_free(thread);
2463
3e170ce0
A
2464 if (thread->state & TH_IDLE) {
2465 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
39037602
A
2466 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DISPATCH) | DBG_FUNC_NONE,
2467 (uintptr_t)thread_tid(thread), 0, thread->state,
2468 sched_run_buckets[TH_BUCKET_RUN], 0);
3e170ce0 2469 } else {
316670eb
A
2470 int64_t consumed;
2471 int64_t remainder = 0;
2472
2473 if (processor->quantum_end > processor->last_dispatch)
2474 remainder = processor->quantum_end -
2475 processor->last_dispatch;
2476
fe8ab488 2477 consumed = thread->quantum_remaining - remainder;
316670eb 2478
39236c6e 2479 if ((thread->reason & AST_LEDGER) == 0) {
316670eb 2480 /*
39236c6e
A
2481 * Bill CPU time to both the task and
2482 * the individual thread.
316670eb
A
2483 */
2484 ledger_credit(thread->t_ledger,
2485 task_ledgers.cpu_time, consumed);
2486 ledger_credit(thread->t_threadledger,
2487 thread_ledgers.cpu_time, consumed);
fe8ab488
A
2488#ifdef CONFIG_BANK
2489 if (thread->t_bankledger) {
2490 ledger_credit(thread->t_bankledger,
2491 bank_ledgers.cpu_time,
2492 (consumed - thread->t_deduct_bank_ledger_time));
2493
2494 }
2495 thread->t_deduct_bank_ledger_time =0;
2496#endif
39236c6e 2497 }
316670eb 2498
2d21ac55
A
2499 wake_lock(thread);
2500 thread_lock(thread);
9bccf70c 2501
91447636 2502 /*
39037602
A
2503 * Apply a priority floor if the thread holds a kernel resource
2504 * Do this before checking starting_pri to avoid overpenalizing
2505 * repeated rwlock blockers.
2506 */
2507 if (__improbable(thread->rwlock_count != 0))
2508 lck_rw_set_promotion_locked(thread);
2509
2510 boolean_t keep_quantum = processor->first_timeslice;
2511
2512 /*
2513 * Treat a thread which has dropped priority since it got on core
2514 * as having expired its quantum.
91447636 2515 */
39037602
A
2516 if (processor->starting_pri > thread->sched_pri)
2517 keep_quantum = FALSE;
2518
2519 /* Compute remainder of current quantum. */
2520 if (keep_quantum &&
316670eb 2521 processor->quantum_end > processor->last_dispatch)
fe8ab488 2522 thread->quantum_remaining = (uint32_t)remainder;
2d21ac55 2523 else
fe8ab488 2524 thread->quantum_remaining = 0;
2d21ac55 2525
6d2010ae 2526 if (thread->sched_mode == TH_MODE_REALTIME) {
2d21ac55
A
2527 /*
2528 * Cancel the deadline if the thread has
2529 * consumed the entire quantum.
2530 */
fe8ab488 2531 if (thread->quantum_remaining == 0) {
2d21ac55 2532 thread->realtime.deadline = UINT64_MAX;
2d21ac55 2533 }
b7266188 2534 } else {
3e170ce0 2535#if defined(CONFIG_SCHED_TIMESHARE_CORE)
2d21ac55
A
2536 /*
2537 * For non-realtime threads treat a tiny
2538 * remaining quantum as an expired quantum
2539 * but include what's left next time.
2540 */
fe8ab488 2541 if (thread->quantum_remaining < min_std_quantum) {
2d21ac55 2542 thread->reason |= AST_QUANTUM;
fe8ab488 2543 thread->quantum_remaining += SCHED(initial_quantum_size)(thread);
2d21ac55 2544 }
3e170ce0 2545#endif /* CONFIG_SCHED_TIMESHARE_CORE */
2d21ac55
A
2546 }
2547
91447636 2548 /*
2d21ac55
A
2549 * If we are doing a direct handoff then
2550 * take the remainder of the quantum.
91447636 2551 */
2d21ac55 2552 if ((thread->reason & (AST_HANDOFF|AST_QUANTUM)) == AST_HANDOFF) {
fe8ab488 2553 self->quantum_remaining = thread->quantum_remaining;
2d21ac55 2554 thread->reason |= AST_QUANTUM;
fe8ab488
A
2555 thread->quantum_remaining = 0;
2556 } else {
2557#if defined(CONFIG_SCHED_MULTIQ)
3e170ce0
A
2558 if (SCHED(sched_groups_enabled) &&
2559 thread->sched_group == self->sched_group) {
fe8ab488 2560 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3e170ce0 2561 MACHDBG_CODE(DBG_MACH_SCHED, MACH_QUANTUM_HANDOFF),
fe8ab488
A
2562 self->reason, (uintptr_t)thread_tid(thread),
2563 self->quantum_remaining, thread->quantum_remaining, 0);
2564
2565 self->quantum_remaining = thread->quantum_remaining;
2566 thread->quantum_remaining = 0;
3e170ce0 2567 /* Don't set AST_QUANTUM here - old thread might still want to preempt someone else */
fe8ab488
A
2568 }
2569#endif /* defined(CONFIG_SCHED_MULTIQ) */
91447636 2570 }
91447636 2571
b0d623f7 2572 thread->computation_metered += (processor->last_dispatch - thread->computation_epoch);
2d21ac55
A
2573
2574 if (!(thread->state & TH_WAIT)) {
2575 /*
3e170ce0 2576 * Still runnable.
2d21ac55 2577 */
3e170ce0
A
2578 thread->last_made_runnable_time = mach_approximate_time();
2579
d190cdc3 2580 machine_thread_going_off_core(thread, FALSE, processor->last_dispatch);
3e170ce0 2581
2d21ac55
A
2582 if (thread->reason & AST_QUANTUM)
2583 thread_setrun(thread, SCHED_TAILQ);
3e170ce0 2584 else if (thread->reason & AST_PREEMPT)
2d21ac55
A
2585 thread_setrun(thread, SCHED_HEADQ);
2586 else
2587 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
2588
fe8ab488 2589 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
39037602
A
2590 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DISPATCH) | DBG_FUNC_NONE,
2591 (uintptr_t)thread_tid(thread), thread->reason, thread->state,
2592 sched_run_buckets[TH_BUCKET_RUN], 0);
3e170ce0 2593
316670eb
A
2594 if (thread->wake_active) {
2595 thread->wake_active = FALSE;
2596 thread_unlock(thread);
2597
2598 thread_wakeup(&thread->wake_active);
3e170ce0 2599 } else {
316670eb 2600 thread_unlock(thread);
3e170ce0 2601 }
316670eb 2602
2d21ac55 2603 wake_unlock(thread);
3e170ce0 2604 } else {
2d21ac55
A
2605 /*
2606 * Waiting.
2607 */
b7266188 2608 boolean_t should_terminate = FALSE;
fe8ab488 2609 uint32_t new_run_count;
b7266188
A
2610
2611 /* Only the first call to thread_dispatch
2612 * after explicit termination should add
2613 * the thread to the termination queue
2614 */
2615 if ((thread->state & (TH_TERMINATE|TH_TERMINATE2)) == TH_TERMINATE) {
2616 should_terminate = TRUE;
2617 thread->state |= TH_TERMINATE2;
2618 }
2619
2d21ac55 2620 thread->state &= ~TH_RUN;
3e170ce0 2621 thread->last_made_runnable_time = ~0ULL;
39236c6e
A
2622 thread->chosen_processor = PROCESSOR_NULL;
2623
fe8ab488 2624 new_run_count = sched_run_decr(thread);
2d21ac55 2625
3e170ce0 2626#if CONFIG_SCHED_SFI
fe8ab488
A
2627 if ((thread->state & (TH_WAIT | TH_TERMINATE)) == TH_WAIT) {
2628 if (thread->reason & AST_SFI) {
2629 thread->wait_sfi_begin_time = processor->last_dispatch;
2630 }
39236c6e 2631 }
3e170ce0
A
2632#endif
2633
d190cdc3 2634 machine_thread_going_off_core(thread, should_terminate, processor->last_dispatch);
fe8ab488
A
2635
2636 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
39037602
A
2637 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DISPATCH) | DBG_FUNC_NONE,
2638 (uintptr_t)thread_tid(thread), thread->reason, thread->state,
2639 new_run_count, 0);
2d21ac55 2640
b7266188
A
2641 (*thread->sched_call)(SCHED_CALL_BLOCK, thread);
2642
2d21ac55
A
2643 if (thread->wake_active) {
2644 thread->wake_active = FALSE;
2645 thread_unlock(thread);
2646
2647 thread_wakeup(&thread->wake_active);
3e170ce0 2648 } else {
2d21ac55 2649 thread_unlock(thread);
3e170ce0 2650 }
91447636 2651
2d21ac55 2652 wake_unlock(thread);
91447636 2653
b7266188 2654 if (should_terminate)
2d21ac55
A
2655 thread_terminate_enqueue(thread);
2656 }
2657 }
91447636 2658 }
91447636 2659
3e170ce0
A
2660 /* Update (new) current thread and reprogram quantum timer */
2661 thread_lock(self);
2d21ac55 2662 if (!(self->state & TH_IDLE)) {
39236c6e
A
2663 uint64_t arg1, arg2;
2664 int urgency;
3e170ce0
A
2665 uint64_t latency;
2666
2667#if CONFIG_SCHED_SFI
fe8ab488
A
2668 ast_t new_ast;
2669
fe8ab488 2670 new_ast = sfi_thread_needs_ast(self, NULL);
fe8ab488
A
2671
2672 if (new_ast != AST_NONE) {
2673 ast_on(new_ast);
2674 }
3e170ce0
A
2675#endif
2676
39037602 2677 assertf(processor->last_dispatch >= self->last_made_runnable_time, "Non-monotonic time? dispatch at 0x%llx, runnable at 0x%llx", processor->last_dispatch, self->last_made_runnable_time);
3e170ce0 2678 latency = processor->last_dispatch - self->last_made_runnable_time;
6d2010ae 2679
39236c6e
A
2680 urgency = thread_get_urgency(self, &arg1, &arg2);
2681
3e170ce0
A
2682 thread_tell_urgency(urgency, arg1, arg2, latency, self);
2683
d190cdc3 2684 machine_thread_going_on_core(self, urgency, latency, processor->last_dispatch);
39236c6e 2685
91447636 2686 /*
2d21ac55 2687 * Get a new quantum if none remaining.
91447636 2688 */
fe8ab488 2689 if (self->quantum_remaining == 0) {
2d21ac55 2690 thread_quantum_init(self);
6d2010ae 2691 }
91447636
A
2692
2693 /*
2d21ac55 2694 * Set up quantum timer and timeslice.
91447636 2695 */
fe8ab488
A
2696 processor->quantum_end = processor->last_dispatch + self->quantum_remaining;
2697 timer_call_enter1(&processor->quantum_timer, self, processor->quantum_end, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL);
91447636 2698
3e170ce0
A
2699 processor->first_timeslice = TRUE;
2700 } else {
2701 timer_call_cancel(&processor->quantum_timer);
2702 processor->first_timeslice = FALSE;
91447636 2703
3e170ce0 2704 thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0, 0, self);
d190cdc3 2705 machine_thread_going_on_core(self, THREAD_URGENCY_NONE, 0, processor->last_dispatch);
91447636 2706 }
6d2010ae 2707
813fb2f6 2708 assert(self->block_hint == kThreadWaitNone);
3e170ce0
A
2709 self->computation_epoch = processor->last_dispatch;
2710 self->reason = AST_NONE;
39037602 2711 processor->starting_pri = self->sched_pri;
3e170ce0
A
2712
2713 thread_unlock(self);
2714
2715#if defined(CONFIG_SCHED_DEFERRED_AST)
2716 /*
2717 * TODO: Can we state that redispatching our old thread is also
2718 * uninteresting?
2719 */
39037602 2720 if ((((volatile uint32_t)sched_run_buckets[TH_BUCKET_RUN]) == 1) &&
3e170ce0
A
2721 !(self->state & TH_IDLE)) {
2722 pset_cancel_deferred_dispatch(processor->processor_set, processor);
91447636 2723 }
3e170ce0
A
2724#endif
2725
91447636
A
2726}
2727
2728/*
2d21ac55 2729 * thread_block_reason:
91447636 2730 *
2d21ac55
A
2731 * Forces a reschedule, blocking the caller if a wait
2732 * has been asserted.
91447636 2733 *
2d21ac55
A
2734 * If a continuation is specified, then thread_invoke will
2735 * attempt to discard the thread's kernel stack. When the
2736 * thread resumes, it will execute the continuation function
2737 * on a new kernel stack.
91447636 2738 */
2d21ac55
A
2739counter(mach_counter_t c_thread_block_calls = 0;)
2740
2741wait_result_t
2742thread_block_reason(
2743 thread_continue_t continuation,
2744 void *parameter,
2745 ast_t reason)
91447636 2746{
3e170ce0
A
2747 thread_t self = current_thread();
2748 processor_t processor;
2749 thread_t new_thread;
2750 spl_t s;
1c79356b
A
2751
2752 counter(++c_thread_block_calls);
2753
1c79356b
A
2754 s = splsched();
2755
55e303ae 2756 processor = current_processor();
1c79356b 2757
9bccf70c
A
2758 /* If we're explicitly yielding, force a subsequent quantum */
2759 if (reason & AST_YIELD)
3e170ce0 2760 processor->first_timeslice = FALSE;
0b4e3aa0 2761
9bccf70c
A
2762 /* We're handling all scheduling AST's */
2763 ast_off(AST_SCHEDULING);
1c79356b 2764
490019cf
A
2765#if PROC_REF_DEBUG
2766 if ((continuation != NULL) && (self->task != kernel_task)) {
2767 if (uthread_get_proc_refcount(self->uthread) != 0) {
2768 panic("thread_block_reason with continuation uthread %p with uu_proc_refcount != 0", self->uthread);
2769 }
2770 }
2771#endif
2772
91447636
A
2773 self->continuation = continuation;
2774 self->parameter = parameter;
2775
fe8ab488 2776 if (self->state & ~(TH_RUN | TH_IDLE)) {
316670eb
A
2777 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2778 MACHDBG_CODE(DBG_MACH_SCHED,MACH_BLOCK),
2779 reason, VM_KERNEL_UNSLIDE(continuation), 0, 0, 0);
b0d623f7
A
2780 }
2781
2d21ac55 2782 do {
91447636 2783 thread_lock(self);
fe8ab488 2784 new_thread = thread_select(self, processor, reason);
91447636 2785 thread_unlock(self);
2d21ac55 2786 } while (!thread_invoke(self, new_thread, reason));
1c79356b 2787
1c79356b
A
2788 splx(s);
2789
91447636 2790 return (self->wait_result);
1c79356b
A
2791}
2792
2793/*
2794 * thread_block:
2795 *
9bccf70c 2796 * Block the current thread if a wait has been asserted.
1c79356b 2797 */
91447636 2798wait_result_t
1c79356b 2799thread_block(
9bccf70c 2800 thread_continue_t continuation)
1c79356b 2801{
91447636
A
2802 return thread_block_reason(continuation, NULL, AST_NONE);
2803}
2804
2805wait_result_t
2806thread_block_parameter(
2807 thread_continue_t continuation,
2808 void *parameter)
2809{
2810 return thread_block_reason(continuation, parameter, AST_NONE);
1c79356b
A
2811}
2812
2813/*
2814 * thread_run:
2815 *
91447636 2816 * Switch directly from the current thread to the
55e303ae 2817 * new thread, handing off our quantum if appropriate.
9bccf70c
A
2818 *
2819 * New thread must be runnable, and not on a run queue.
1c79356b 2820 *
55e303ae 2821 * Called at splsched.
1c79356b
A
2822 */
2823int
2824thread_run(
91447636 2825 thread_t self,
9bccf70c 2826 thread_continue_t continuation,
91447636 2827 void *parameter,
9bccf70c 2828 thread_t new_thread)
1c79356b 2829{
9bccf70c
A
2830 ast_t handoff = AST_HANDOFF;
2831
91447636
A
2832 self->continuation = continuation;
2833 self->parameter = parameter;
9bccf70c 2834
91447636 2835 while (!thread_invoke(self, new_thread, handoff)) {
2d21ac55 2836 processor_t processor = current_processor();
9bccf70c 2837
91447636 2838 thread_lock(self);
fe8ab488 2839 new_thread = thread_select(self, processor, AST_NONE);
91447636 2840 thread_unlock(self);
9bccf70c
A
2841 handoff = AST_NONE;
2842 }
2843
91447636 2844 return (self->wait_result);
1c79356b
A
2845}
2846
2847/*
91447636 2848 * thread_continue:
55e303ae 2849 *
91447636
A
2850 * Called at splsched when a thread first receives
2851 * a new stack after a continuation.
1c79356b
A
2852 */
2853void
91447636 2854thread_continue(
3e170ce0 2855 thread_t thread)
1c79356b 2856{
3e170ce0
A
2857 thread_t self = current_thread();
2858 thread_continue_t continuation;
2859 void *parameter;
b0d623f7
A
2860
2861 DTRACE_SCHED(on__cpu);
2862
91447636 2863 continuation = self->continuation;
91447636 2864 parameter = self->parameter;
9bccf70c 2865
39037602
A
2866#if KPERF
2867 kperf_on_cpu(self, continuation, NULL);
2868#endif
2869
2d21ac55 2870 thread_dispatch(thread, self);
9bccf70c 2871
2d21ac55 2872 self->continuation = self->parameter = NULL;
1c79356b 2873
2d21ac55 2874 if (thread != THREAD_NULL)
91447636 2875 (void)spllo();
9bccf70c 2876
2d21ac55 2877 TLOG(1, "thread_continue: calling call_continuation \n");
91447636
A
2878 call_continuation(continuation, parameter, self->wait_result);
2879 /*NOTREACHED*/
1c79356b
A
2880}
2881
2d21ac55 2882void
6d2010ae 2883thread_quantum_init(thread_t thread)
2d21ac55 2884{
6d2010ae 2885 if (thread->sched_mode == TH_MODE_REALTIME) {
fe8ab488 2886 thread->quantum_remaining = thread->realtime.computation;
6d2010ae 2887 } else {
fe8ab488 2888 thread->quantum_remaining = SCHED(initial_quantum_size)(thread);
6d2010ae
A
2889 }
2890}
2d21ac55 2891
fe8ab488 2892uint32_t
3e170ce0 2893sched_timeshare_initial_quantum_size(thread_t thread)
6d2010ae 2894{
39037602 2895 if ((thread != THREAD_NULL) && thread->th_sched_bucket == TH_BUCKET_SHARE_BG)
316670eb 2896 return bg_quantum;
39037602
A
2897 else
2898 return std_quantum;
6d2010ae
A
2899}
2900
6d2010ae
A
2901/*
2902 * run_queue_init:
2903 *
2904 * Initialize a run queue before first use.
2905 */
2906void
2907run_queue_init(
2908 run_queue_t rq)
2909{
39037602
A
2910 rq->highq = NOPRI;
2911 for (u_int i = 0; i < BITMAP_LEN(NRQS); i++)
2d21ac55 2912 rq->bitmap[i] = 0;
2d21ac55 2913 rq->urgency = rq->count = 0;
39037602 2914 for (int i = 0; i < NRQS; i++)
2d21ac55
A
2915 queue_init(&rq->queues[i]);
2916}
1c79356b 2917
2d21ac55
A
2918/*
2919 * run_queue_dequeue:
2920 *
2921 * Perform a dequeue operation on a run queue,
2922 * and return the resulting thread.
2923 *
6d2010ae 2924 * The run queue must be locked (see thread_run_queue_remove()
2d21ac55
A
2925 * for more info), and not empty.
2926 */
6d2010ae 2927thread_t
2d21ac55 2928run_queue_dequeue(
39037602
A
2929 run_queue_t rq,
2930 integer_t options)
2d21ac55 2931{
39037602
A
2932 thread_t thread;
2933 queue_t queue = &rq->queues[rq->highq];
9bccf70c 2934
2d21ac55 2935 if (options & SCHED_HEADQ) {
39037602
A
2936 thread = qe_dequeue_head(queue, struct thread, runq_links);
2937 } else {
2938 thread = qe_dequeue_tail(queue, struct thread, runq_links);
9bccf70c 2939 }
1c79356b 2940
39037602
A
2941 assert(thread != THREAD_NULL);
2942 assert_thread_magic(thread);
2943
2d21ac55 2944 thread->runq = PROCESSOR_NULL;
6d2010ae 2945 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
2d21ac55 2946 rq->count--;
6d2010ae 2947 if (SCHED(priority_is_urgent)(rq->highq)) {
4a3eedf9
A
2948 rq->urgency--; assert(rq->urgency >= 0);
2949 }
2d21ac55 2950 if (queue_empty(queue)) {
39037602
A
2951 bitmap_clear(rq->bitmap, rq->highq);
2952 rq->highq = bitmap_first(rq->bitmap, NRQS);
2d21ac55 2953 }
1c79356b 2954
39037602 2955 return thread;
1c79356b
A
2956}
2957
6d2010ae
A
2958/*
2959 * run_queue_enqueue:
2960 *
2961 * Perform a enqueue operation on a run queue.
2962 *
2963 * The run queue must be locked (see thread_run_queue_remove()
2964 * for more info).
2965 */
2966boolean_t
2967run_queue_enqueue(
39037602
A
2968 run_queue_t rq,
2969 thread_t thread,
2970 integer_t options)
6d2010ae 2971{
39037602
A
2972 queue_t queue = &rq->queues[thread->sched_pri];
2973 boolean_t result = FALSE;
2974
2975 assert_thread_magic(thread);
2976
6d2010ae 2977 if (queue_empty(queue)) {
39037602
A
2978 enqueue_tail(queue, &thread->runq_links);
2979
2980 rq_bitmap_set(rq->bitmap, thread->sched_pri);
6d2010ae
A
2981 if (thread->sched_pri > rq->highq) {
2982 rq->highq = thread->sched_pri;
2983 result = TRUE;
2984 }
fe8ab488 2985 } else {
6d2010ae 2986 if (options & SCHED_TAILQ)
39037602 2987 enqueue_tail(queue, &thread->runq_links);
6d2010ae 2988 else
39037602 2989 enqueue_head(queue, &thread->runq_links);
fe8ab488 2990 }
6d2010ae
A
2991 if (SCHED(priority_is_urgent)(thread->sched_pri))
2992 rq->urgency++;
2993 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
2994 rq->count++;
39037602 2995
6d2010ae 2996 return (result);
6d2010ae
A
2997}
2998
2999/*
3000 * run_queue_remove:
3001 *
3002 * Remove a specific thread from a runqueue.
3003 *
3004 * The run queue must be locked.
3005 */
3006void
3007run_queue_remove(
39037602
A
3008 run_queue_t rq,
3009 thread_t thread)
6d2010ae 3010{
39037602
A
3011 assert(thread->runq != PROCESSOR_NULL);
3012 assert_thread_magic(thread);
6d2010ae 3013
39037602 3014 remqueue(&thread->runq_links);
6d2010ae
A
3015 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
3016 rq->count--;
3017 if (SCHED(priority_is_urgent)(thread->sched_pri)) {
3018 rq->urgency--; assert(rq->urgency >= 0);
3019 }
39037602
A
3020
3021 if (queue_empty(&rq->queues[thread->sched_pri])) {
6d2010ae 3022 /* update run queue status */
39037602
A
3023 bitmap_clear(rq->bitmap, thread->sched_pri);
3024 rq->highq = bitmap_first(rq->bitmap, NRQS);
6d2010ae 3025 }
39037602 3026
6d2010ae
A
3027 thread->runq = PROCESSOR_NULL;
3028}
3029
3e170ce0
A
3030/* Assumes RT lock is not held, and acquires splsched/rt_lock itself */
3031void
3032rt_runq_scan(sched_update_scan_context_t scan_context)
6d2010ae 3033{
3e170ce0
A
3034 spl_t s;
3035 thread_t thread;
fe8ab488 3036
3e170ce0
A
3037 s = splsched();
3038 rt_lock_lock();
6d2010ae 3039
39037602 3040 qe_foreach_element_safe(thread, &rt_runq.queue, runq_links) {
3e170ce0
A
3041 if (thread->last_made_runnable_time < scan_context->earliest_rt_make_runnable_time) {
3042 scan_context->earliest_rt_make_runnable_time = thread->last_made_runnable_time;
3043 }
3044 }
6d2010ae 3045
3e170ce0
A
3046 rt_lock_unlock();
3047 splx(s);
6d2010ae
A
3048}
3049
3e170ce0 3050
1c79356b 3051/*
2d21ac55
A
3052 * realtime_queue_insert:
3053 *
3054 * Enqueue a thread for realtime execution.
1c79356b 3055 */
2d21ac55 3056static boolean_t
39037602 3057realtime_queue_insert(thread_t thread)
1c79356b 3058{
39037602
A
3059 queue_t queue = &rt_runq.queue;
3060 uint64_t deadline = thread->realtime.deadline;
3061 boolean_t preempt = FALSE;
1c79356b 3062
3e170ce0 3063 rt_lock_lock();
1c79356b 3064
55e303ae 3065 if (queue_empty(queue)) {
39037602 3066 enqueue_tail(queue, &thread->runq_links);
2d21ac55 3067 preempt = TRUE;
39037602
A
3068 } else {
3069 /* Insert into rt_runq in thread deadline order */
3070 queue_entry_t iter;
3071 qe_foreach(iter, queue) {
3072 thread_t iter_thread = qe_element(iter, struct thread, runq_links);
3073 assert_thread_magic(iter_thread);
3074
3075 if (deadline < iter_thread->realtime.deadline) {
3076 if (iter == queue_first(queue))
3077 preempt = TRUE;
3078 insque(&thread->runq_links, queue_prev(iter));
3079 break;
3080 } else if (iter == queue_last(queue)) {
3081 enqueue_tail(queue, &thread->runq_links);
55e303ae
A
3082 break;
3083 }
55e303ae 3084 }
55e303ae
A
3085 }
3086
3e170ce0 3087 thread->runq = THREAD_ON_RT_RUNQ;
6d2010ae
A
3088 SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
3089 rt_runq.count++;
55e303ae 3090
3e170ce0 3091 rt_lock_unlock();
55e303ae 3092
2d21ac55
A
3093 return (preempt);
3094}
55e303ae 3095
2d21ac55
A
3096/*
3097 * realtime_setrun:
3098 *
3099 * Dispatch a thread for realtime execution.
3100 *
3101 * Thread must be locked. Associated pset must
3102 * be locked, and is returned unlocked.
3103 */
3104static void
3105realtime_setrun(
3106 processor_t processor,
3107 thread_t thread)
3108{
3109 processor_set_t pset = processor->processor_set;
39236c6e 3110 ast_t preempt;
55e303ae 3111
fe8ab488
A
3112 boolean_t do_signal_idle = FALSE, do_cause_ast = FALSE;
3113
6d2010ae
A
3114 thread->chosen_processor = processor;
3115
fe8ab488
A
3116 /* <rdar://problem/15102234> */
3117 assert(thread->bound_processor == PROCESSOR_NULL);
3118
2d21ac55
A
3119 /*
3120 * Dispatch directly onto idle processor.
3121 */
6d2010ae
A
3122 if ( (thread->bound_processor == processor)
3123 && processor->state == PROCESSOR_IDLE) {
39037602 3124 re_queue_tail(&pset->active_queue, &processor->processor_queue);
55e303ae 3125
2d21ac55 3126 processor->next_thread = thread;
39236c6e
A
3127 processor->current_pri = thread->sched_pri;
3128 processor->current_thmode = thread->sched_mode;
fe8ab488 3129 processor->current_sfi_class = thread->sfi_class;
2d21ac55
A
3130 processor->deadline = thread->realtime.deadline;
3131 processor->state = PROCESSOR_DISPATCHING;
55e303ae 3132
39236c6e 3133 if (processor != current_processor()) {
3e170ce0 3134 if (!(pset->pending_AST_cpu_mask & (1ULL << processor->cpu_id))) {
39236c6e 3135 /* cleared on exit from main processor_idle() loop */
3e170ce0 3136 pset->pending_AST_cpu_mask |= (1ULL << processor->cpu_id);
fe8ab488 3137 do_signal_idle = TRUE;
39236c6e
A
3138 }
3139 }
39236c6e 3140 pset_unlock(pset);
fe8ab488
A
3141
3142 if (do_signal_idle) {
3143 machine_signal_idle(processor);
3144 }
2d21ac55
A
3145 return;
3146 }
55e303ae 3147
39236c6e
A
3148 if (processor->current_pri < BASEPRI_RTQUEUES)
3149 preempt = (AST_PREEMPT | AST_URGENT);
3150 else if (thread->realtime.deadline < processor->deadline)
3151 preempt = (AST_PREEMPT | AST_URGENT);
3152 else
3153 preempt = AST_NONE;
3154
3155 realtime_queue_insert(thread);
3156
3157 if (preempt != AST_NONE) {
3158 if (processor->state == PROCESSOR_IDLE) {
39037602
A
3159 re_queue_tail(&pset->active_queue, &processor->processor_queue);
3160
39236c6e
A
3161 processor->next_thread = THREAD_NULL;
3162 processor->current_pri = thread->sched_pri;
3163 processor->current_thmode = thread->sched_mode;
fe8ab488 3164 processor->current_sfi_class = thread->sfi_class;
39236c6e
A
3165 processor->deadline = thread->realtime.deadline;
3166 processor->state = PROCESSOR_DISPATCHING;
3167 if (processor == current_processor()) {
3168 ast_on(preempt);
3169 } else {
3e170ce0 3170 if (!(pset->pending_AST_cpu_mask & (1ULL << processor->cpu_id))) {
39236c6e 3171 /* cleared on exit from main processor_idle() loop */
3e170ce0 3172 pset->pending_AST_cpu_mask |= (1ULL << processor->cpu_id);
fe8ab488 3173 do_signal_idle = TRUE;
39236c6e 3174 }
fe8ab488 3175 }
39236c6e
A
3176 } else if (processor->state == PROCESSOR_DISPATCHING) {
3177 if ((processor->next_thread == THREAD_NULL) && ((processor->current_pri < thread->sched_pri) || (processor->deadline > thread->realtime.deadline))) {
3178 processor->current_pri = thread->sched_pri;
3179 processor->current_thmode = thread->sched_mode;
fe8ab488 3180 processor->current_sfi_class = thread->sfi_class;
39236c6e
A
3181 processor->deadline = thread->realtime.deadline;
3182 }
3183 } else {
3184 if (processor == current_processor()) {
3185 ast_on(preempt);
3186 } else {
3e170ce0 3187 if (!(pset->pending_AST_cpu_mask & (1ULL << processor->cpu_id))) {
39236c6e 3188 /* cleared after IPI causes csw_check() to be called */
3e170ce0 3189 pset->pending_AST_cpu_mask |= (1ULL << processor->cpu_id);
fe8ab488 3190 do_cause_ast = TRUE;
39236c6e
A
3191 }
3192 }
3193 }
3194 } else {
3195 /* Selected processor was too busy, just keep thread enqueued and let other processors drain it naturally. */
2d21ac55
A
3196 }
3197
3198 pset_unlock(pset);
fe8ab488
A
3199
3200 if (do_signal_idle) {
3201 machine_signal_idle(processor);
3202 } else if (do_cause_ast) {
3203 cause_ast_check(processor);
3204 }
2d21ac55
A
3205}
3206
6d2010ae 3207
fe8ab488
A
3208#if defined(CONFIG_SCHED_TIMESHARE_CORE)
3209
3210boolean_t
6d2010ae
A
3211priority_is_urgent(int priority)
3212{
39037602 3213 return bitmap_test(sched_preempt_pri, priority) ? TRUE : FALSE;
6d2010ae
A
3214}
3215
fe8ab488
A
3216#endif /* CONFIG_SCHED_TIMESHARE_CORE */
3217
55e303ae 3218/*
2d21ac55 3219 * processor_setrun:
55e303ae 3220 *
2d21ac55
A
3221 * Dispatch a thread for execution on a
3222 * processor.
55e303ae 3223 *
2d21ac55
A
3224 * Thread must be locked. Associated pset must
3225 * be locked, and is returned unlocked.
55e303ae 3226 */
2d21ac55
A
3227static void
3228processor_setrun(
3229 processor_t processor,
3230 thread_t thread,
3231 integer_t options)
55e303ae 3232{
2d21ac55
A
3233 processor_set_t pset = processor->processor_set;
3234 ast_t preempt;
39236c6e 3235 enum { eExitIdle, eInterruptRunning, eDoNothing } ipi_action = eDoNothing;
3e170ce0 3236 enum { eNoSignal, eDoSignal, eDoDeferredSignal } do_signal_idle = eNoSignal;
55e303ae 3237
3e170ce0 3238 boolean_t do_cause_ast = FALSE;
fe8ab488 3239
6d2010ae
A
3240 thread->chosen_processor = processor;
3241
55e303ae 3242 /*
2d21ac55 3243 * Dispatch directly onto idle processor.
55e303ae 3244 */
6d2010ae
A
3245 if ( (SCHED(direct_dispatch_to_idle_processors) ||
3246 thread->bound_processor == processor)
3247 && processor->state == PROCESSOR_IDLE) {
39037602
A
3248
3249 re_queue_tail(&pset->active_queue, &processor->processor_queue);
2d21ac55
A
3250
3251 processor->next_thread = thread;
39236c6e
A
3252 processor->current_pri = thread->sched_pri;
3253 processor->current_thmode = thread->sched_mode;
fe8ab488 3254 processor->current_sfi_class = thread->sfi_class;
2d21ac55
A
3255 processor->deadline = UINT64_MAX;
3256 processor->state = PROCESSOR_DISPATCHING;
2d21ac55 3257
3e170ce0 3258 if (!(pset->pending_AST_cpu_mask & (1ULL << processor->cpu_id))) {
39236c6e 3259 /* cleared on exit from main processor_idle() loop */
3e170ce0
A
3260 pset->pending_AST_cpu_mask |= (1ULL << processor->cpu_id);
3261 do_signal_idle = eDoSignal;
39236c6e
A
3262 }
3263
3264 pset_unlock(pset);
3e170ce0
A
3265
3266 if (do_signal_idle == eDoSignal) {
fe8ab488
A
3267 machine_signal_idle(processor);
3268 }
3269
2d21ac55
A
3270 return;
3271 }
55e303ae
A
3272
3273 /*
2d21ac55 3274 * Set preemption mode.
1c79356b 3275 */
3e170ce0
A
3276#if defined(CONFIG_SCHED_DEFERRED_AST)
3277 /* TODO: Do we need to care about urgency (see rdar://problem/20136239)? */
3278#endif
6d2010ae
A
3279 if (SCHED(priority_is_urgent)(thread->sched_pri) && thread->sched_pri > processor->current_pri)
3280 preempt = (AST_PREEMPT | AST_URGENT);
3281 else if(processor->active_thread && thread_eager_preemption(processor->active_thread))
55e303ae 3282 preempt = (AST_PREEMPT | AST_URGENT);
3e170ce0
A
3283 else if ((thread->sched_mode == TH_MODE_TIMESHARE) && (thread->sched_pri < thread->base_pri)) {
3284 if(SCHED(priority_is_urgent)(thread->base_pri) && thread->sched_pri > processor->current_pri) {
39236c6e
A
3285 preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE;
3286 } else {
3287 preempt = AST_NONE;
3288 }
3289 } else
2d21ac55 3290 preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE;
9bccf70c 3291
39236c6e 3292 SCHED(processor_enqueue)(processor, thread, options);
9bccf70c 3293
2d21ac55 3294 if (preempt != AST_NONE) {
39236c6e 3295 if (processor->state == PROCESSOR_IDLE) {
39037602
A
3296 re_queue_tail(&pset->active_queue, &processor->processor_queue);
3297
39236c6e
A
3298 processor->next_thread = THREAD_NULL;
3299 processor->current_pri = thread->sched_pri;
3300 processor->current_thmode = thread->sched_mode;
fe8ab488 3301 processor->current_sfi_class = thread->sfi_class;
39236c6e
A
3302 processor->deadline = UINT64_MAX;
3303 processor->state = PROCESSOR_DISPATCHING;
3304
3305 ipi_action = eExitIdle;
3306 } else if ( processor->state == PROCESSOR_DISPATCHING) {
3307 if ((processor->next_thread == THREAD_NULL) && (processor->current_pri < thread->sched_pri)) {
3308 processor->current_pri = thread->sched_pri;
3309 processor->current_thmode = thread->sched_mode;
fe8ab488 3310 processor->current_sfi_class = thread->sfi_class;
39236c6e
A
3311 processor->deadline = UINT64_MAX;
3312 }
3313 } else if ( (processor->state == PROCESSOR_RUNNING ||
2d21ac55 3314 processor->state == PROCESSOR_SHUTDOWN) &&
3e170ce0 3315 (thread->sched_pri >= processor->current_pri)) {
39236c6e 3316 ipi_action = eInterruptRunning;
2d21ac55 3317 }
39236c6e
A
3318 } else {
3319 /*
3320 * New thread is not important enough to preempt what is running, but
3321 * special processor states may need special handling
3322 */
3323 if (processor->state == PROCESSOR_SHUTDOWN &&
2d21ac55 3324 thread->sched_pri >= processor->current_pri ) {
39236c6e 3325 ipi_action = eInterruptRunning;
d190cdc3 3326 } else if (processor->state == PROCESSOR_IDLE) {
39037602
A
3327 re_queue_tail(&pset->active_queue, &processor->processor_queue);
3328
39236c6e
A
3329 processor->next_thread = THREAD_NULL;
3330 processor->current_pri = thread->sched_pri;
3331 processor->current_thmode = thread->sched_mode;
fe8ab488 3332 processor->current_sfi_class = thread->sfi_class;
39236c6e
A
3333 processor->deadline = UINT64_MAX;
3334 processor->state = PROCESSOR_DISPATCHING;
3335
3336 ipi_action = eExitIdle;
3337 }
2d21ac55 3338 }
39236c6e
A
3339
3340 switch (ipi_action) {
3341 case eDoNothing:
3342 break;
3343 case eExitIdle:
3344 if (processor == current_processor()) {
fe8ab488 3345 if (csw_check_locked(processor, pset, AST_NONE) != AST_NONE)
39236c6e
A
3346 ast_on(preempt);
3347 } else {
3e170ce0
A
3348#if defined(CONFIG_SCHED_DEFERRED_AST)
3349 if (!(pset->pending_deferred_AST_cpu_mask & (1ULL << processor->cpu_id)) &&
3350 !(pset->pending_AST_cpu_mask & (1ULL << processor->cpu_id))) {
39236c6e 3351 /* cleared on exit from main processor_idle() loop */
3e170ce0
A
3352 pset->pending_deferred_AST_cpu_mask |= (1ULL << processor->cpu_id);
3353 do_signal_idle = eDoDeferredSignal;
3354 }
3355#else
3356 if (!(pset->pending_AST_cpu_mask & (1ULL << processor->cpu_id))) {
3357 /* cleared on exit from main processor_idle() loop */
3358 pset->pending_AST_cpu_mask |= (1ULL << processor->cpu_id);
3359 do_signal_idle = eDoSignal;
39236c6e 3360 }
3e170ce0 3361#endif
39236c6e
A
3362 }
3363 break;
3364 case eInterruptRunning:
3365 if (processor == current_processor()) {
fe8ab488 3366 if (csw_check_locked(processor, pset, AST_NONE) != AST_NONE)
39236c6e
A
3367 ast_on(preempt);
3368 } else {
3e170ce0 3369 if (!(pset->pending_AST_cpu_mask & (1ULL << processor->cpu_id))) {
39236c6e 3370 /* cleared after IPI causes csw_check() to be called */
3e170ce0 3371 pset->pending_AST_cpu_mask |= (1ULL << processor->cpu_id);
fe8ab488 3372 do_cause_ast = TRUE;
39236c6e
A
3373 }
3374 }
3375 break;
6d2010ae 3376 }
2d21ac55
A
3377
3378 pset_unlock(pset);
fe8ab488 3379
3e170ce0 3380 if (do_signal_idle == eDoSignal) {
fe8ab488 3381 machine_signal_idle(processor);
fe8ab488 3382 }
3e170ce0
A
3383#if defined(CONFIG_SCHED_DEFERRED_AST)
3384 else if (do_signal_idle == eDoDeferredSignal) {
3385 /*
3386 * TODO: The ability to cancel this signal could make
3387 * sending it outside of the pset lock an issue. Do
3388 * we need to address this? Or would the only fallout
3389 * be that the core takes a signal? As long as we do
3390 * not run the risk of having a core marked as signal
3391 * outstanding, with no real signal outstanding, the
3392 * only result should be that we fail to cancel some
3393 * signals.
3394 */
3395 machine_signal_idle_deferred(processor);
316670eb 3396 }
3e170ce0
A
3397#endif
3398 else if (do_cause_ast) {
3399 cause_ast_check(processor);
6d2010ae 3400 }
6d2010ae
A
3401}
3402
2d21ac55
A
3403/*
3404 * choose_next_pset:
3405 *
3406 * Return the next sibling pset containing
3407 * available processors.
3408 *
3409 * Returns the original pset if none other is
3410 * suitable.
3411 */
3412static processor_set_t
3413choose_next_pset(
3414 processor_set_t pset)
3415{
3416 processor_set_t nset = pset;
3417
3418 do {
3419 nset = next_pset(nset);
6d2010ae 3420 } while (nset->online_processor_count < 1 && nset != pset);
2d21ac55 3421
cf7d32b8 3422 return (nset);
2d21ac55
A
3423}
3424
3425/*
3426 * choose_processor:
3427 *
3428 * Choose a processor for the thread, beginning at
b7266188 3429 * the pset. Accepts an optional processor hint in
2d21ac55
A
3430 * the pset.
3431 *
3432 * Returns a processor, possibly from a different pset.
3433 *
3434 * The thread must be locked. The pset must be locked,
3435 * and the resulting pset is locked on return.
3436 */
6d2010ae 3437processor_t
2d21ac55
A
3438choose_processor(
3439 processor_set_t pset,
b7266188 3440 processor_t processor,
2d21ac55
A
3441 thread_t thread)
3442{
3443 processor_set_t nset, cset = pset;
39037602
A
3444
3445 assert(thread->sched_pri <= BASEPRI_RTQUEUES);
3446
cf7d32b8 3447 /*
fe8ab488 3448 * Prefer the hinted processor, when appropriate.
cf7d32b8 3449 */
b7266188 3450
fe8ab488 3451 /* Fold last processor hint from secondary processor to its primary */
0b4c1975 3452 if (processor != PROCESSOR_NULL) {
fe8ab488 3453 processor = processor->processor_primary;
0b4c1975 3454 }
b0d623f7 3455
fe8ab488
A
3456 /*
3457 * Only consult platform layer if pset is active, which
3458 * it may not be in some cases when a multi-set system
3459 * is going to sleep.
3460 */
3461 if (pset->online_processor_count) {
3462 if ((processor == PROCESSOR_NULL) || (processor->processor_set == pset && processor->state == PROCESSOR_IDLE)) {
3463 processor_t mc_processor = machine_choose_processor(pset, processor);
3464 if (mc_processor != PROCESSOR_NULL)
3465 processor = mc_processor->processor_primary;
3466 }
3467 }
b7266188 3468
fe8ab488
A
3469 /*
3470 * At this point, we may have a processor hint, and we may have
3471 * an initial starting pset. If the hint is not in the pset, or
3472 * if the hint is for a processor in an invalid state, discard
3473 * the hint.
3474 */
0b4c1975 3475 if (processor != PROCESSOR_NULL) {
fe8ab488 3476 if (processor->processor_set != pset) {
cf7d32b8 3477 processor = PROCESSOR_NULL;
3e170ce0
A
3478 } else if (!processor->is_recommended) {
3479 processor = PROCESSOR_NULL;
fe8ab488
A
3480 } else {
3481 switch (processor->state) {
3482 case PROCESSOR_START:
3483 case PROCESSOR_SHUTDOWN:
3484 case PROCESSOR_OFF_LINE:
3485 /*
3486 * Hint is for a processor that cannot support running new threads.
3487 */
3488 processor = PROCESSOR_NULL;
3489 break;
3490 case PROCESSOR_IDLE:
3491 /*
3492 * Hint is for an idle processor. Assume it is no worse than any other
3493 * idle processor. The platform layer had an opportunity to provide
3494 * the "least cost idle" processor above.
3495 */
3496 return (processor);
fe8ab488
A
3497 case PROCESSOR_RUNNING:
3498 case PROCESSOR_DISPATCHING:
3499 /*
3500 * Hint is for an active CPU. This fast-path allows
3501 * realtime threads to preempt non-realtime threads
3502 * to regain their previous executing processor.
3503 */
3504 if ((thread->sched_pri >= BASEPRI_RTQUEUES) &&
3505 (processor->current_pri < BASEPRI_RTQUEUES))
3506 return (processor);
3507
3508 /* Otherwise, use hint as part of search below */
3509 break;
3510 default:
3511 processor = PROCESSOR_NULL;
3512 break;
3513 }
3514 }
b7266188 3515 }
2d21ac55
A
3516
3517 /*
fe8ab488
A
3518 * Iterate through the processor sets to locate
3519 * an appropriate processor. Seed results with
3520 * a last-processor hint, if available, so that
3521 * a search must find something strictly better
3522 * to replace it.
3523 *
3524 * A primary/secondary pair of SMT processors are
3525 * "unpaired" if the primary is busy but its
3526 * corresponding secondary is idle (so the physical
3527 * core has full use of its resources).
2d21ac55 3528 */
fe8ab488
A
3529
3530 integer_t lowest_priority = MAXPRI + 1;
3531 integer_t lowest_unpaired_primary_priority = MAXPRI + 1;
3532 integer_t lowest_count = INT_MAX;
3533 uint64_t furthest_deadline = 1;
3534 processor_t lp_processor = PROCESSOR_NULL;
3535 processor_t lp_unpaired_primary_processor = PROCESSOR_NULL;
3536 processor_t lp_unpaired_secondary_processor = PROCESSOR_NULL;
3537 processor_t lc_processor = PROCESSOR_NULL;
3538 processor_t fd_processor = PROCESSOR_NULL;
3539
3540 if (processor != PROCESSOR_NULL) {
3541 /* All other states should be enumerated above. */
3542 assert(processor->state == PROCESSOR_RUNNING || processor->state == PROCESSOR_DISPATCHING);
3543
3544 lowest_priority = processor->current_pri;
3545 lp_processor = processor;
3546
3547 if (processor->current_pri >= BASEPRI_RTQUEUES) {
3548 furthest_deadline = processor->deadline;
3549 fd_processor = processor;
3550 }
3551
3552 lowest_count = SCHED(processor_runq_count)(processor);
3553 lc_processor = processor;
3554 }
3555
2d21ac55 3556 do {
fe8ab488 3557
9bccf70c 3558 /*
fe8ab488 3559 * Choose an idle processor, in pset traversal order
9bccf70c 3560 */
3e170ce0
A
3561 qe_foreach_element(processor, &cset->idle_queue, processor_queue) {
3562 if (processor->is_recommended)
3563 return processor;
3564 }
1c79356b 3565
fe8ab488
A
3566 /*
3567 * Otherwise, enumerate active and idle processors to find candidates
3568 * with lower priority/etc.
3569 */
0b4c1975 3570
3e170ce0
A
3571 qe_foreach_element(processor, &cset->active_queue, processor_queue) {
3572
3573 if (!processor->is_recommended) {
3574 continue;
3575 }
2d21ac55 3576
fe8ab488
A
3577 integer_t cpri = processor->current_pri;
3578 if (cpri < lowest_priority) {
3579 lowest_priority = cpri;
3580 lp_processor = processor;
3581 }
b0d623f7 3582
fe8ab488
A
3583 if ((cpri >= BASEPRI_RTQUEUES) && (processor->deadline > furthest_deadline)) {
3584 furthest_deadline = processor->deadline;
3585 fd_processor = processor;
3586 }
0b4c1975 3587
fe8ab488
A
3588 integer_t ccount = SCHED(processor_runq_count)(processor);
3589 if (ccount < lowest_count) {
3590 lowest_count = ccount;
3591 lc_processor = processor;
3592 }
fe8ab488
A
3593 }
3594
3595 /*
3596 * For SMT configs, these idle secondary processors must have active primary. Otherwise
3597 * the idle primary would have short-circuited the loop above
3598 */
3e170ce0
A
3599 qe_foreach_element(processor, &cset->idle_secondary_queue, processor_queue) {
3600
3601 if (!processor->is_recommended) {
3602 continue;
3603 }
3604
fe8ab488
A
3605 processor_t cprimary = processor->processor_primary;
3606
3607 /* If the primary processor is offline or starting up, it's not a candidate for this path */
3608 if (cprimary->state == PROCESSOR_RUNNING || cprimary->state == PROCESSOR_DISPATCHING) {
3609 integer_t primary_pri = cprimary->current_pri;
3610
3611 if (primary_pri < lowest_unpaired_primary_priority) {
3612 lowest_unpaired_primary_priority = primary_pri;
3613 lp_unpaired_primary_processor = cprimary;
3614 lp_unpaired_secondary_processor = processor;
0b4c1975 3615 }
2d21ac55 3616 }
fe8ab488
A
3617 }
3618
0b4c1975 3619
fe8ab488
A
3620 if (thread->sched_pri >= BASEPRI_RTQUEUES) {
3621
3622 /*
3623 * For realtime threads, the most important aspect is
3624 * scheduling latency, so we attempt to assign threads
3625 * to good preemption candidates (assuming an idle primary
3626 * processor was not available above).
3627 */
3628
3629 if (thread->sched_pri > lowest_unpaired_primary_priority) {
3630 /* Move to end of active queue so that the next thread doesn't also pick it */
39037602 3631 re_queue_tail(&cset->active_queue, &lp_unpaired_primary_processor->processor_queue);
fe8ab488
A
3632 return lp_unpaired_primary_processor;
3633 }
3634 if (thread->sched_pri > lowest_priority) {
3635 /* Move to end of active queue so that the next thread doesn't also pick it */
39037602 3636 re_queue_tail(&cset->active_queue, &lp_processor->processor_queue);
0b4c1975 3637 return lp_processor;
fe8ab488 3638 }
0b4c1975
A
3639 if (thread->realtime.deadline < furthest_deadline)
3640 return fd_processor;
6d2010ae 3641
2d21ac55 3642 /*
fe8ab488
A
3643 * If all primary and secondary CPUs are busy with realtime
3644 * threads with deadlines earlier than us, move on to next
3645 * pset.
2d21ac55 3646 */
fe8ab488
A
3647 }
3648 else {
3649
3650 if (thread->sched_pri > lowest_unpaired_primary_priority) {
3651 /* Move to end of active queue so that the next thread doesn't also pick it */
39037602 3652 re_queue_tail(&cset->active_queue, &lp_unpaired_primary_processor->processor_queue);
fe8ab488 3653 return lp_unpaired_primary_processor;
c910b4d9 3654 }
fe8ab488
A
3655 if (thread->sched_pri > lowest_priority) {
3656 /* Move to end of active queue so that the next thread doesn't also pick it */
39037602 3657 re_queue_tail(&cset->active_queue, &lp_processor->processor_queue);
fe8ab488 3658 return lp_processor;
cf7d32b8 3659 }
9bccf70c 3660
9bccf70c 3661 /*
fe8ab488
A
3662 * If all primary processor in this pset are running a higher
3663 * priority thread, move on to next pset. Only when we have
3664 * exhausted this search do we fall back to other heuristics.
1c79356b 3665 */
2d21ac55
A
3666 }
3667
3668 /*
fe8ab488 3669 * Move onto the next processor set.
2d21ac55
A
3670 */
3671 nset = next_pset(cset);
3672
3673 if (nset != pset) {
3674 pset_unlock(cset);
3675
3676 cset = nset;
3677 pset_lock(cset);
3678 }
3679 } while (nset != pset);
3680
3681 /*
fe8ab488
A
3682 * Make sure that we pick a running processor,
3683 * and that the correct processor set is locked.
3684 * Since we may have unlock the candidate processor's
3685 * pset, it may have changed state.
3686 *
3687 * All primary processors are running a higher priority
3688 * thread, so the only options left are enqueuing on
3689 * the secondary processor that would perturb the least priority
3690 * primary, or the least busy primary.
2d21ac55 3691 */
cf7d32b8 3692 do {
2d21ac55 3693
fe8ab488
A
3694 /* lowest_priority is evaluated in the main loops above */
3695 if (lp_unpaired_secondary_processor != PROCESSOR_NULL) {
3696 processor = lp_unpaired_secondary_processor;
3697 lp_unpaired_secondary_processor = PROCESSOR_NULL;
3698 } else if (lc_processor != PROCESSOR_NULL) {
3699 processor = lc_processor;
3700 lc_processor = PROCESSOR_NULL;
3701 } else {
cf7d32b8 3702 /*
fe8ab488
A
3703 * All processors are executing higher
3704 * priority threads, and the lowest_count
3705 * candidate was not usable
cf7d32b8 3706 */
fe8ab488 3707 processor = master_processor;
cf7d32b8
A
3708 }
3709
3710 /*
fe8ab488
A
3711 * Check that the correct processor set is
3712 * returned locked.
cf7d32b8
A
3713 */
3714 if (cset != processor->processor_set) {
3715 pset_unlock(cset);
cf7d32b8
A
3716 cset = processor->processor_set;
3717 pset_lock(cset);
3718 }
3719
3720 /*
fe8ab488
A
3721 * We must verify that the chosen processor is still available.
3722 * master_processor is an exception, since we may need to preempt
3723 * a running thread on it during processor shutdown (for sleep),
3724 * and that thread needs to be enqueued on its runqueue to run
3725 * when the processor is restarted.
cf7d32b8 3726 */
fe8ab488 3727 if (processor != master_processor && (processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE))
cf7d32b8 3728 processor = PROCESSOR_NULL;
fe8ab488 3729
cf7d32b8 3730 } while (processor == PROCESSOR_NULL);
2d21ac55
A
3731
3732 return (processor);
3733}
3734
3735/*
3736 * thread_setrun:
3737 *
3738 * Dispatch thread for execution, onto an idle
3739 * processor or run queue, and signal a preemption
3740 * as appropriate.
3741 *
3742 * Thread must be locked.
3743 */
3744void
3745thread_setrun(
3746 thread_t thread,
3747 integer_t options)
3748{
3749 processor_t processor;
3750 processor_set_t pset;
3751
3e170ce0
A
3752 assert((thread->state & (TH_RUN|TH_WAIT|TH_UNINT|TH_TERMINATE|TH_TERMINATE2)) == TH_RUN);
3753 assert(thread->runq == PROCESSOR_NULL);
3754
2d21ac55
A
3755 /*
3756 * Update priority if needed.
3757 */
6d2010ae
A
3758 if (SCHED(can_update_priority)(thread))
3759 SCHED(update_priority)(thread);
2d21ac55 3760
fe8ab488
A
3761 thread->sfi_class = sfi_thread_classify(thread);
3762
2d21ac55
A
3763 assert(thread->runq == PROCESSOR_NULL);
3764
3e170ce0 3765#if __SMP__
2d21ac55
A
3766 if (thread->bound_processor == PROCESSOR_NULL) {
3767 /*
3768 * Unbound case.
3769 */
3770 if (thread->affinity_set != AFFINITY_SET_NULL) {
3771 /*
3772 * Use affinity set policy hint.
3773 */
3774 pset = thread->affinity_set->aset_pset;
3775 pset_lock(pset);
3776
6d2010ae 3777 processor = SCHED(choose_processor)(pset, PROCESSOR_NULL, thread);
39236c6e 3778
3e170ce0 3779 SCHED_DEBUG_CHOOSE_PROCESSOR_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
39236c6e 3780 (uintptr_t)thread_tid(thread), (uintptr_t)-1, processor->cpu_id, processor->state, 0);
3e170ce0 3781 } else if (thread->last_processor != PROCESSOR_NULL) {
2d21ac55
A
3782 /*
3783 * Simple (last processor) affinity case.
3784 */
3785 processor = thread->last_processor;
3786 pset = processor->processor_set;
3787 pset_lock(pset);
6d2010ae
A
3788 processor = SCHED(choose_processor)(pset, processor, thread);
3789
3e170ce0 3790 SCHED_DEBUG_CHOOSE_PROCESSOR_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
39236c6e 3791 (uintptr_t)thread_tid(thread), thread->last_processor->cpu_id, processor->cpu_id, processor->state, 0);
3e170ce0 3792 } else {
2d21ac55
A
3793 /*
3794 * No Affinity case:
3795 *
cf7d32b8
A
3796 * Utilitize a per task hint to spread threads
3797 * among the available processor sets.
2d21ac55 3798 */
cf7d32b8
A
3799 task_t task = thread->task;
3800
3801 pset = task->pset_hint;
3802 if (pset == PROCESSOR_SET_NULL)
3803 pset = current_processor()->processor_set;
3804
3805 pset = choose_next_pset(pset);
2d21ac55 3806 pset_lock(pset);
9bccf70c 3807
6d2010ae 3808 processor = SCHED(choose_processor)(pset, PROCESSOR_NULL, thread);
cf7d32b8 3809 task->pset_hint = processor->processor_set;
39236c6e 3810
3e170ce0 3811 SCHED_DEBUG_CHOOSE_PROCESSOR_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
39236c6e 3812 (uintptr_t)thread_tid(thread), (uintptr_t)-1, processor->cpu_id, processor->state, 0);
55e303ae 3813 }
3e170ce0 3814 } else {
2d21ac55
A
3815 /*
3816 * Bound case:
3817 *
3818 * Unconditionally dispatch on the processor.
3819 */
3820 processor = thread->bound_processor;
55e303ae 3821 pset = processor->processor_set;
2d21ac55 3822 pset_lock(pset);
39236c6e 3823
3e170ce0 3824 SCHED_DEBUG_CHOOSE_PROCESSOR_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
39236c6e 3825 (uintptr_t)thread_tid(thread), (uintptr_t)-2, processor->cpu_id, processor->state, 0);
2d21ac55 3826 }
3e170ce0
A
3827#else /* !__SMP__ */
3828 /* Only one processor to choose */
3829 assert(thread->bound_processor == PROCESSOR_NULL || thread->bound_processor == master_processor);
3830 processor = master_processor;
3831 pset = processor->processor_set;
3832 pset_lock(pset);
3833#endif /* !__SMP__ */
2d21ac55
A
3834
3835 /*
3e170ce0 3836 * Dispatch the thread on the chosen processor.
fe8ab488 3837 * TODO: This should be based on sched_mode, not sched_pri
2d21ac55
A
3838 */
3839 if (thread->sched_pri >= BASEPRI_RTQUEUES)
3840 realtime_setrun(processor, thread);
3841 else
3842 processor_setrun(processor, thread, options);
3843}
3844
b0d623f7
A
3845processor_set_t
3846task_choose_pset(
3847 task_t task)
3848{
3849 processor_set_t pset = task->pset_hint;
3850
3851 if (pset != PROCESSOR_SET_NULL)
3852 pset = choose_next_pset(pset);
3853
3854 return (pset);
3855}
3856
9bccf70c 3857/*
c910b4d9
A
3858 * Check for a preemption point in
3859 * the current context.
55e303ae 3860 *
fe8ab488 3861 * Called at splsched with thread locked.
9bccf70c
A
3862 */
3863ast_t
3864csw_check(
fe8ab488
A
3865 processor_t processor,
3866 ast_t check_reason)
39236c6e
A
3867{
3868 processor_set_t pset = processor->processor_set;
3869 ast_t result;
3870
3871 pset_lock(pset);
3872
3873 /* If we were sent a remote AST and interrupted a running processor, acknowledge it here with pset lock held */
3e170ce0 3874 pset->pending_AST_cpu_mask &= ~(1ULL << processor->cpu_id);
39236c6e 3875
fe8ab488 3876 result = csw_check_locked(processor, pset, check_reason);
39236c6e
A
3877
3878 pset_unlock(pset);
3879
3880 return result;
3881}
3882
3883/*
3884 * Check for preemption at splsched with
fe8ab488 3885 * pset and thread locked
39236c6e
A
3886 */
3887ast_t
3888csw_check_locked(
3889 processor_t processor,
fe8ab488
A
3890 processor_set_t pset __unused,
3891 ast_t check_reason)
9bccf70c 3892{
fe8ab488 3893 ast_t result;
316670eb 3894 thread_t thread = processor->active_thread;
9bccf70c 3895
3e170ce0 3896 if (processor->first_timeslice) {
6d2010ae 3897 if (rt_runq.count > 0)
fe8ab488 3898 return (check_reason | AST_PREEMPT | AST_URGENT);
9bccf70c
A
3899 }
3900 else {
39236c6e
A
3901 if (rt_runq.count > 0) {
3902 if (BASEPRI_RTQUEUES > processor->current_pri)
fe8ab488 3903 return (check_reason | AST_PREEMPT | AST_URGENT);
39236c6e 3904 else
fe8ab488 3905 return (check_reason | AST_PREEMPT);
39236c6e 3906 }
1c79356b 3907 }
9bccf70c 3908
316670eb 3909 result = SCHED(processor_csw_check)(processor);
9bccf70c 3910 if (result != AST_NONE)
3e170ce0
A
3911 return (check_reason | result | (thread_eager_preemption(thread) ? AST_URGENT : AST_NONE));
3912
3913#if __SMP__
9bccf70c 3914
3e170ce0
A
3915 /*
3916 * If the current thread is running on a processor that is no longer recommended, gently
3917 * (non-urgently) get to a point and then block, and which point thread_select() should
3918 * try to idle the processor and re-dispatch the thread to a recommended processor.
3919 */
3920 if (!processor->is_recommended)
fe8ab488 3921 return (check_reason | AST_PREEMPT);
3e170ce0
A
3922
3923 /*
3924 * Even though we could continue executing on this processor, a
3925 * secondary SMT core should try to shed load to another primary core.
3926 *
3927 * TODO: Should this do the same check that thread_select does? i.e.
3928 * if no bound threads target this processor, and idle primaries exist, preempt
3929 * The case of RT threads existing is already taken care of above
3930 * Consider Capri in this scenario.
3931 *
3932 * if (!SCHED(processor_bound_count)(processor) && !queue_empty(&pset->idle_queue))
3933 *
3934 * TODO: Alternatively - check if only primary is idle, or check if primary's pri is lower than mine.
3935 */
3936
3937 if (processor->current_pri < BASEPRI_RTQUEUES &&
3938 processor->processor_primary != processor)
3939 return (check_reason | AST_PREEMPT);
3940#endif
3941
316670eb 3942 if (thread->state & TH_SUSP)
fe8ab488
A
3943 return (check_reason | AST_PREEMPT);
3944
3e170ce0 3945#if CONFIG_SCHED_SFI
fe8ab488
A
3946 /*
3947 * Current thread may not need to be preempted, but maybe needs
3948 * an SFI wait?
3949 */
3950 result = sfi_thread_needs_ast(thread, NULL);
3951 if (result != AST_NONE)
3952 return (check_reason | result);
3e170ce0 3953#endif
c910b4d9
A
3954
3955 return (AST_NONE);
1c79356b
A
3956}
3957
3958/*
9bccf70c 3959 * set_sched_pri:
1c79356b 3960 *
55e303ae
A
3961 * Set the scheduled priority of the specified thread.
3962 *
9bccf70c 3963 * This may cause the thread to change queues.
1c79356b 3964 *
55e303ae 3965 * Thread must be locked.
1c79356b
A
3966 */
3967void
9bccf70c 3968set_sched_pri(
3e170ce0
A
3969 thread_t thread,
3970 int priority)
1c79356b 3971{
3e170ce0
A
3972 thread_t cthread = current_thread();
3973 boolean_t is_current_thread = (thread == cthread) ? TRUE : FALSE;
fe8ab488
A
3974 int curgency, nurgency;
3975 uint64_t urgency_param1, urgency_param2;
3e170ce0 3976 boolean_t removed_from_runq = FALSE;
9bccf70c 3977
3e170ce0
A
3978 /* If we're already at this priority, no need to mess with the runqueue */
3979 if (priority == thread->sched_pri)
3980 return;
3981
3982 if (is_current_thread) {
3983 assert(thread->runq == PROCESSOR_NULL);
fe8ab488 3984 curgency = thread_get_urgency(thread, &urgency_param1, &urgency_param2);
3e170ce0
A
3985 } else {
3986 removed_from_runq = thread_run_queue_remove(thread);
fe8ab488 3987 }
3e170ce0 3988
490019cf
A
3989 thread->sched_pri = priority;
3990
3e170ce0
A
3991 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHANGE_PRIORITY),
3992 (uintptr_t)thread_tid(thread),
3993 thread->base_pri,
3994 thread->sched_pri,
3995 0, /* eventually, 'reason' */
3996 0);
3997
3e170ce0 3998 if (is_current_thread) {
fe8ab488 3999 nurgency = thread_get_urgency(thread, &urgency_param1, &urgency_param2);
3e170ce0
A
4000 /*
4001 * set_sched_pri doesn't alter RT params. We expect direct base priority/QoS
4002 * class alterations from user space to occur relatively infrequently, hence
4003 * those are lazily handled. QoS classes have distinct priority bands, and QoS
4004 * inheritance is expected to involve priority changes.
4005 */
fe8ab488 4006 if (nurgency != curgency) {
3e170ce0 4007 thread_tell_urgency(nurgency, urgency_param1, urgency_param2, 0, thread);
d190cdc3 4008 machine_thread_going_on_core(thread, nurgency, 0, 0);
fe8ab488
A
4009 }
4010 }
4011
3e170ce0
A
4012 /* TODO: Should this be TAILQ if it went down, HEADQ if it went up? */
4013 if (removed_from_runq)
4014 thread_run_queue_reinsert(thread, SCHED_PREEMPT | SCHED_TAILQ);
4015 else if (thread->state & TH_RUN) {
4016 processor_t processor = thread->last_processor;
9bccf70c 4017
3e170ce0
A
4018 if (is_current_thread) {
4019 ast_t preempt;
9bccf70c 4020
9bccf70c 4021 processor->current_pri = priority;
6d2010ae 4022 processor->current_thmode = thread->sched_mode;
fe8ab488
A
4023 processor->current_sfi_class = thread->sfi_class = sfi_thread_classify(thread);
4024 if ((preempt = csw_check(processor, AST_NONE)) != AST_NONE)
c910b4d9 4025 ast_on(preempt);
3e170ce0 4026 } else if (processor != PROCESSOR_NULL && processor->active_thread == thread)
9bccf70c 4027 cause_ast_check(processor);
1c79356b
A
4028 }
4029}
4030
3e170ce0
A
4031/*
4032 * thread_run_queue_remove_for_handoff
4033 *
4034 * Pull a thread or its (recursive) push target out of the runqueue
4035 * so that it is ready for thread_run()
4036 *
4037 * Called at splsched
4038 *
4039 * Returns the thread that was pulled or THREAD_NULL if no thread could be pulled.
4040 * This may be different than the thread that was passed in.
4041 */
4042thread_t
4043thread_run_queue_remove_for_handoff(thread_t thread) {
91447636 4044
3e170ce0 4045 thread_t pulled_thread = THREAD_NULL;
91447636 4046
3e170ce0 4047 thread_lock(thread);
91447636 4048
3e170ce0
A
4049 /*
4050 * Check that the thread is not bound
4051 * to a different processor, and that realtime
4052 * is not involved.
4053 *
4054 * Next, pull it off its run queue. If it
4055 * doesn't come, it's not eligible.
4056 */
91447636 4057
3e170ce0
A
4058 processor_t processor = current_processor();
4059 if (processor->current_pri < BASEPRI_RTQUEUES && thread->sched_pri < BASEPRI_RTQUEUES &&
4060 (thread->bound_processor == PROCESSOR_NULL || thread->bound_processor == processor)) {
91447636 4061
3e170ce0
A
4062 if (thread_run_queue_remove(thread))
4063 pulled_thread = thread;
91447636
A
4064 }
4065
3e170ce0 4066 thread_unlock(thread);
6d2010ae 4067
3e170ce0 4068 return pulled_thread;
6d2010ae
A
4069}
4070
1c79356b 4071/*
6d2010ae 4072 * thread_run_queue_remove:
1c79356b 4073 *
fe8ab488 4074 * Remove a thread from its current run queue and
2d21ac55 4075 * return TRUE if successful.
55e303ae
A
4076 *
4077 * Thread must be locked.
fe8ab488
A
4078 *
4079 * If thread->runq is PROCESSOR_NULL, the thread will not re-enter the
4080 * run queues because the caller locked the thread. Otherwise
4081 * the thread is on a run queue, but could be chosen for dispatch
4082 * and removed by another processor under a different lock, which
4083 * will set thread->runq to PROCESSOR_NULL.
4084 *
4085 * Hence the thread select path must not rely on anything that could
4086 * be changed under the thread lock after calling this function,
4087 * most importantly thread->sched_pri.
1c79356b 4088 */
2d21ac55 4089boolean_t
6d2010ae 4090thread_run_queue_remove(
fe8ab488 4091 thread_t thread)
1c79356b 4092{
fe8ab488
A
4093 boolean_t removed = FALSE;
4094 processor_t processor = thread->runq;
1c79356b 4095
fe8ab488
A
4096 if ((thread->state & (TH_RUN|TH_WAIT)) == TH_WAIT) {
4097 /* Thread isn't runnable */
4098 assert(thread->runq == PROCESSOR_NULL);
4099 return FALSE;
4100 }
55e303ae 4101
fe8ab488 4102 if (processor == PROCESSOR_NULL) {
55e303ae 4103 /*
fe8ab488
A
4104 * The thread is either not on the runq,
4105 * or is in the midst of being removed from the runq.
4106 *
4107 * runq is set to NULL under the pset lock, not the thread
4108 * lock, so the thread may still be in the process of being dequeued
4109 * from the runq. It will wait in invoke for the thread lock to be
4110 * dropped.
55e303ae 4111 */
55e303ae 4112
fe8ab488
A
4113 return FALSE;
4114 }
55e303ae 4115
fe8ab488
A
4116 if (thread->sched_pri < BASEPRI_RTQUEUES) {
4117 return SCHED(processor_queue_remove)(processor, thread);
4118 }
55e303ae 4119
3e170ce0 4120 rt_lock_lock();
55e303ae 4121
fe8ab488
A
4122 if (thread->runq != PROCESSOR_NULL) {
4123 /*
3e170ce0 4124 * Thread is on the RT run queue and we have a lock on
fe8ab488
A
4125 * that run queue.
4126 */
4127
3e170ce0 4128 assert(thread->runq == THREAD_ON_RT_RUNQ);
fe8ab488 4129
39037602 4130 remqueue(&thread->runq_links);
fe8ab488
A
4131 SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
4132 rt_runq.count--;
4133
4134 thread->runq = PROCESSOR_NULL;
4135
4136 removed = TRUE;
1c79356b
A
4137 }
4138
3e170ce0 4139 rt_lock_unlock();
fe8ab488
A
4140
4141 return (removed);
1c79356b
A
4142}
4143
cf7d32b8 4144/*
3e170ce0 4145 * Put the thread back where it goes after a thread_run_queue_remove
cf7d32b8 4146 *
3e170ce0 4147 * Thread must have been removed under the same thread lock hold
cf7d32b8 4148 *
3e170ce0 4149 * thread locked, at splsched
cf7d32b8 4150 */
3e170ce0
A
4151void
4152thread_run_queue_reinsert(thread_t thread, integer_t options)
cf7d32b8 4153{
3e170ce0 4154 assert(thread->runq == PROCESSOR_NULL);
cf7d32b8 4155
3e170ce0
A
4156 assert(thread->state & (TH_RUN));
4157 thread_setrun(thread, options);
6d2010ae 4158
6d2010ae
A
4159}
4160
39236c6e
A
4161void
4162sys_override_cpu_throttle(int flag)
6d2010ae 4163{
39236c6e
A
4164 if (flag == CPU_THROTTLE_ENABLE)
4165 cpu_throttle_enabled = 1;
4166 if (flag == CPU_THROTTLE_DISABLE)
4167 cpu_throttle_enabled = 0;
4168}
6d2010ae 4169
39236c6e
A
4170int
4171thread_get_urgency(thread_t thread, uint64_t *arg1, uint64_t *arg2)
4172{
4173 if (thread == NULL || (thread->state & TH_IDLE)) {
4174 *arg1 = 0;
4175 *arg2 = 0;
4176
4177 return (THREAD_URGENCY_NONE);
4178 } else if (thread->sched_mode == TH_MODE_REALTIME) {
4179 *arg1 = thread->realtime.period;
4180 *arg2 = thread->realtime.deadline;
4181
4182 return (THREAD_URGENCY_REAL_TIME);
4183 } else if (cpu_throttle_enabled &&
3e170ce0 4184 ((thread->sched_pri <= MAXPRI_THROTTLE) && (thread->base_pri <= MAXPRI_THROTTLE))) {
39236c6e
A
4185 /*
4186 * Background urgency applied when thread priority is MAXPRI_THROTTLE or lower and thread is not promoted
4187 */
4188 *arg1 = thread->sched_pri;
3e170ce0 4189 *arg2 = thread->base_pri;
6d2010ae 4190
39236c6e
A
4191 return (THREAD_URGENCY_BACKGROUND);
4192 } else {
fe8ab488
A
4193 /* For otherwise unclassified threads, report throughput QoS
4194 * parameters
4195 */
39037602
A
4196 *arg1 = proc_get_effective_thread_policy(thread, TASK_POLICY_THROUGH_QOS);
4197 *arg2 = proc_get_effective_task_policy(thread->task, TASK_POLICY_THROUGH_QOS);
4198
6d2010ae
A
4199 return (THREAD_URGENCY_NORMAL);
4200 }
6d2010ae
A
4201}
4202
4203
1c79356b 4204/*
2d21ac55
A
4205 * This is the processor idle loop, which just looks for other threads
4206 * to execute. Processor idle threads invoke this without supplying a
4207 * current thread to idle without an asserted wait state.
4208 *
4209 * Returns a the next thread to execute if dispatched directly.
1c79356b 4210 */
6d2010ae
A
4211
4212#if 0
4213#define IDLE_KERNEL_DEBUG_CONSTANT(...) KERNEL_DEBUG_CONSTANT(__VA_ARGS__)
4214#else
4215#define IDLE_KERNEL_DEBUG_CONSTANT(...) do { } while(0)
4216#endif
4217
4218thread_t
2d21ac55
A
4219processor_idle(
4220 thread_t thread,
4221 processor_t processor)
1c79356b 4222{
2d21ac55
A
4223 processor_set_t pset = processor->processor_set;
4224 thread_t new_thread;
4225 int state;
2d21ac55 4226 (void)splsched();
1c79356b 4227
316670eb
A
4228 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
4229 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_START,
4230 (uintptr_t)thread_tid(thread), 0, 0, 0, 0);
3a60a9f5 4231
6d2010ae
A
4232 SCHED_STATS_CPU_IDLE_START(processor);
4233
2d21ac55
A
4234 timer_switch(&PROCESSOR_DATA(processor, system_state),
4235 mach_absolute_time(), &PROCESSOR_DATA(processor, idle_state));
4236 PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, idle_state);
3a60a9f5 4237
39236c6e 4238 while (1) {
39236c6e
A
4239 if (processor->state != PROCESSOR_IDLE) /* unsafe, but worst case we loop around once */
4240 break;
3e170ce0 4241 if (pset->pending_AST_cpu_mask & (1ULL << processor->cpu_id))
39236c6e 4242 break;
3e170ce0
A
4243 if (processor->is_recommended) {
4244 if (rt_runq.count)
4245 break;
4246 } else {
4247 if (SCHED(processor_bound_count)(processor))
4248 break;
4249 }
4250
39236c6e
A
4251#if CONFIG_SCHED_IDLE_IN_PLACE
4252 if (thread != THREAD_NULL) {
4253 /* Did idle-in-place thread wake up */
4254 if ((thread->state & (TH_WAIT|TH_SUSP)) != TH_WAIT || thread->wake_active)
4255 break;
4256 }
4257#endif
4258
6d2010ae
A
4259 IDLE_KERNEL_DEBUG_CONSTANT(
4260 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), rt_runq.count, SCHED(processor_runq_count)(processor), -1, 0);
4261
4b17d6b6
A
4262 machine_track_platform_idle(TRUE);
4263
2d21ac55 4264 machine_idle();
55e303ae 4265
4b17d6b6
A
4266 machine_track_platform_idle(FALSE);
4267
55e303ae 4268 (void)splsched();
c910b4d9 4269
6d2010ae
A
4270 IDLE_KERNEL_DEBUG_CONSTANT(
4271 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), rt_runq.count, SCHED(processor_runq_count)(processor), -2, 0);
4272
fe8ab488
A
4273 if (!SCHED(processor_queue_empty)(processor)) {
4274 /* Secondary SMT processors respond to directed wakeups
4275 * exclusively. Some platforms induce 'spurious' SMT wakeups.
4276 */
4277 if (processor->processor_primary == processor)
4278 break;
4279 }
55e303ae
A
4280 }
4281
2d21ac55
A
4282 timer_switch(&PROCESSOR_DATA(processor, idle_state),
4283 mach_absolute_time(), &PROCESSOR_DATA(processor, system_state));
4284 PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, system_state);
1c79356b 4285
2d21ac55
A
4286 pset_lock(pset);
4287
39236c6e 4288 /* If we were sent a remote AST and came out of idle, acknowledge it here with pset lock held */
3e170ce0
A
4289 pset->pending_AST_cpu_mask &= ~(1ULL << processor->cpu_id);
4290#if defined(CONFIG_SCHED_DEFERRED_AST)
4291 pset->pending_deferred_AST_cpu_mask &= ~(1ULL << processor->cpu_id);
4292#endif
39236c6e 4293
55e303ae
A
4294 state = processor->state;
4295 if (state == PROCESSOR_DISPATCHING) {
1c79356b 4296 /*
55e303ae 4297 * Commmon case -- cpu dispatched.
1c79356b 4298 */
2d21ac55
A
4299 new_thread = processor->next_thread;
4300 processor->next_thread = THREAD_NULL;
55e303ae 4301 processor->state = PROCESSOR_RUNNING;
1c79356b 4302
39236c6e 4303 if ((new_thread != THREAD_NULL) && (SCHED(processor_queue_has_priority)(processor, new_thread->sched_pri, FALSE) ||
3e170ce0 4304 (rt_runq.count > 0)) ) {
fe8ab488 4305 /* Something higher priority has popped up on the runqueue - redispatch this thread elsewhere */
39236c6e
A
4306 processor->current_pri = IDLEPRI;
4307 processor->current_thmode = TH_MODE_FIXED;
fe8ab488 4308 processor->current_sfi_class = SFI_CLASS_KERNEL;
2d21ac55 4309 processor->deadline = UINT64_MAX;
55e303ae 4310
2d21ac55 4311 pset_unlock(pset);
1c79356b 4312
2d21ac55 4313 thread_lock(new_thread);
6d2010ae 4314 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REDISPATCH), (uintptr_t)thread_tid(new_thread), new_thread->sched_pri, rt_runq.count, 0, 0);
2d21ac55
A
4315 thread_setrun(new_thread, SCHED_HEADQ);
4316 thread_unlock(new_thread);
55e303ae 4317
316670eb
A
4318 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
4319 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END,
4320 (uintptr_t)thread_tid(thread), state, 0, 0, 0);
6d2010ae 4321
2d21ac55 4322 return (THREAD_NULL);
1c79356b 4323 }
1c79356b 4324
2d21ac55
A
4325 pset_unlock(pset);
4326
316670eb
A
4327 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
4328 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END,
4329 (uintptr_t)thread_tid(thread), state, (uintptr_t)thread_tid(new_thread), 0, 0);
39037602 4330
2d21ac55 4331 return (new_thread);
39037602
A
4332
4333 } else if (state == PROCESSOR_IDLE) {
4334 re_queue_tail(&pset->active_queue, &processor->processor_queue);
1c79356b 4335
2d21ac55 4336 processor->state = PROCESSOR_RUNNING;
39236c6e
A
4337 processor->current_pri = IDLEPRI;
4338 processor->current_thmode = TH_MODE_FIXED;
fe8ab488 4339 processor->current_sfi_class = SFI_CLASS_KERNEL;
39236c6e 4340 processor->deadline = UINT64_MAX;
39037602
A
4341
4342 } else if (state == PROCESSOR_SHUTDOWN) {
55e303ae
A
4343 /*
4344 * Going off-line. Force a
4345 * reschedule.
4346 */
2d21ac55
A
4347 if ((new_thread = processor->next_thread) != THREAD_NULL) {
4348 processor->next_thread = THREAD_NULL;
39236c6e
A
4349 processor->current_pri = IDLEPRI;
4350 processor->current_thmode = TH_MODE_FIXED;
fe8ab488 4351 processor->current_sfi_class = SFI_CLASS_KERNEL;
55e303ae 4352 processor->deadline = UINT64_MAX;
2d21ac55
A
4353
4354 pset_unlock(pset);
55e303ae
A
4355
4356 thread_lock(new_thread);
4357 thread_setrun(new_thread, SCHED_HEADQ);
4358 thread_unlock(new_thread);
55e303ae 4359
316670eb
A
4360 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
4361 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END,
4362 (uintptr_t)thread_tid(thread), state, 0, 0, 0);
6d2010ae 4363
2d21ac55
A
4364 return (THREAD_NULL);
4365 }
55e303ae
A
4366 }
4367
2d21ac55
A
4368 pset_unlock(pset);
4369
316670eb
A
4370 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
4371 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END,
4372 (uintptr_t)thread_tid(thread), state, 0, 0, 0);
6d2010ae 4373
2d21ac55
A
4374 return (THREAD_NULL);
4375}
4376
cf7d32b8
A
4377/*
4378 * Each processor has a dedicated thread which
4379 * executes the idle loop when there is no suitable
4380 * previous context.
4381 */
2d21ac55
A
4382void
4383idle_thread(void)
4384{
4385 processor_t processor = current_processor();
4386 thread_t new_thread;
4387
4388 new_thread = processor_idle(THREAD_NULL, processor);
4389 if (new_thread != THREAD_NULL) {
4390 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
4391 /*NOTREACHED*/
4392 }
55e303ae 4393
2d21ac55 4394 thread_block((thread_continue_t)idle_thread);
55e303ae 4395 /*NOTREACHED*/
1c79356b
A
4396}
4397
91447636
A
4398kern_return_t
4399idle_thread_create(
4400 processor_t processor)
1c79356b 4401{
91447636
A
4402 kern_return_t result;
4403 thread_t thread;
4404 spl_t s;
4405
4406 result = kernel_thread_create((thread_continue_t)idle_thread, NULL, MAXPRI_KERNEL, &thread);
4407 if (result != KERN_SUCCESS)
4408 return (result);
4409
4410 s = splsched();
4411 thread_lock(thread);
4412 thread->bound_processor = processor;
4413 processor->idle_thread = thread;
3e170ce0 4414 thread->sched_pri = thread->base_pri = IDLEPRI;
91447636 4415 thread->state = (TH_RUN | TH_IDLE);
39236c6e 4416 thread->options |= TH_OPT_IDLE_THREAD;
91447636
A
4417 thread_unlock(thread);
4418 splx(s);
4419
4420 thread_deallocate(thread);
4421
4422 return (KERN_SUCCESS);
1c79356b
A
4423}
4424
91447636
A
4425/*
4426 * sched_startup:
4427 *
4428 * Kicks off scheduler services.
4429 *
4430 * Called at splsched.
4431 */
0b4e3aa0 4432void
91447636 4433sched_startup(void)
0b4e3aa0 4434{
91447636
A
4435 kern_return_t result;
4436 thread_t thread;
4437
3e170ce0
A
4438 simple_lock_init(&sched_vm_group_list_lock, 0);
4439
490019cf 4440
6d2010ae 4441 result = kernel_thread_start_priority((thread_continue_t)sched_init_thread,
39236c6e 4442 (void *)SCHED(maintenance_continuation), MAXPRI_KERNEL, &thread);
91447636
A
4443 if (result != KERN_SUCCESS)
4444 panic("sched_startup");
4445
4446 thread_deallocate(thread);
4447
39037602
A
4448 assert_thread_magic(thread);
4449
91447636 4450 /*
316670eb
A
4451 * Yield to the sched_init_thread once, to
4452 * initialize our own thread after being switched
4453 * back to.
91447636
A
4454 *
4455 * The current thread is the only other thread
4456 * active at this point.
4457 */
316670eb 4458 thread_block(THREAD_CONTINUE_NULL);
6d2010ae 4459}
91447636 4460
fe8ab488 4461#if defined(CONFIG_SCHED_TIMESHARE_CORE)
91447636 4462
39236c6e
A
4463static volatile uint64_t sched_maintenance_deadline;
4464static uint64_t sched_tick_last_abstime;
4465static uint64_t sched_tick_delta;
4466uint64_t sched_tick_max_delta;
1c79356b 4467/*
6d2010ae 4468 * sched_init_thread:
1c79356b 4469 *
55e303ae
A
4470 * Perform periodic bookkeeping functions about ten
4471 * times per second.
1c79356b 4472 */
fe8ab488 4473void
3e170ce0 4474sched_timeshare_maintenance_continue(void)
1c79356b 4475{
fe8ab488
A
4476 uint64_t sched_tick_ctime, late_time;
4477
3e170ce0
A
4478 struct sched_update_scan_context scan_context = {
4479 .earliest_bg_make_runnable_time = UINT64_MAX,
4480 .earliest_normal_make_runnable_time = UINT64_MAX,
4481 .earliest_rt_make_runnable_time = UINT64_MAX
4482 };
4483
fe8ab488 4484 sched_tick_ctime = mach_absolute_time();
1c79356b 4485
39236c6e
A
4486 if (__improbable(sched_tick_last_abstime == 0)) {
4487 sched_tick_last_abstime = sched_tick_ctime;
fe8ab488 4488 late_time = 0;
39236c6e
A
4489 sched_tick_delta = 1;
4490 } else {
fe8ab488
A
4491 late_time = sched_tick_ctime - sched_tick_last_abstime;
4492 sched_tick_delta = late_time / sched_tick_interval;
39236c6e
A
4493 /* Ensure a delta of 1, since the interval could be slightly
4494 * smaller than the sched_tick_interval due to dispatch
4495 * latencies.
4496 */
4497 sched_tick_delta = MAX(sched_tick_delta, 1);
4498
4499 /* In the event interrupt latencies or platform
4500 * idle events that advanced the timebase resulted
4501 * in periods where no threads were dispatched,
4502 * cap the maximum "tick delta" at SCHED_TICK_MAX_DELTA
4503 * iterations.
4504 */
4505 sched_tick_delta = MIN(sched_tick_delta, SCHED_TICK_MAX_DELTA);
4506
4507 sched_tick_last_abstime = sched_tick_ctime;
4508 sched_tick_max_delta = MAX(sched_tick_delta, sched_tick_max_delta);
4509 }
4510
fe8ab488 4511 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_MAINTENANCE)|DBG_FUNC_START,
39037602 4512 sched_tick_delta, late_time, 0, 0, 0);
fe8ab488 4513
39236c6e
A
4514 /* Add a number of pseudo-ticks corresponding to the elapsed interval
4515 * This could be greater than 1 if substantial intervals where
4516 * all processors are idle occur, which rarely occurs in practice.
4517 */
39037602 4518
39236c6e 4519 sched_tick += sched_tick_delta;
1c79356b
A
4520
4521 /*
91447636 4522 * Compute various averages.
1c79356b 4523 */
39236c6e 4524 compute_averages(sched_tick_delta);
1c79356b
A
4525
4526 /*
91447636 4527 * Scan the run queues for threads which
39037602
A
4528 * may need to be updated, and find the earliest runnable thread on the runqueue
4529 * to report its latency.
1c79356b 4530 */
3e170ce0
A
4531 SCHED(thread_update_scan)(&scan_context);
4532
4533 rt_runq_scan(&scan_context);
4534
4535 uint64_t ctime = mach_absolute_time();
4536
39037602
A
4537 uint64_t bg_max_latency = (ctime > scan_context.earliest_bg_make_runnable_time) ?
4538 ctime - scan_context.earliest_bg_make_runnable_time : 0;
4539
4540 uint64_t default_max_latency = (ctime > scan_context.earliest_normal_make_runnable_time) ?
4541 ctime - scan_context.earliest_normal_make_runnable_time : 0;
4542
4543 uint64_t realtime_max_latency = (ctime > scan_context.earliest_rt_make_runnable_time) ?
4544 ctime - scan_context.earliest_rt_make_runnable_time : 0;
4545
4546 machine_max_runnable_latency(bg_max_latency, default_max_latency, realtime_max_latency);
3e170ce0
A
4547
4548 /*
4549 * Check to see if the special sched VM group needs attention.
4550 */
4551 sched_vm_group_maintenance();
fe8ab488 4552
490019cf 4553
39037602
A
4554 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_MAINTENANCE) | DBG_FUNC_END,
4555 sched_pri_shifts[TH_BUCKET_SHARE_FG], sched_pri_shifts[TH_BUCKET_SHARE_BG],
4556 sched_pri_shifts[TH_BUCKET_SHARE_UT], 0, 0);
1c79356b 4557
3e170ce0
A
4558 assert_wait((event_t)sched_timeshare_maintenance_continue, THREAD_UNINT);
4559 thread_block((thread_continue_t)sched_timeshare_maintenance_continue);
1c79356b
A
4560 /*NOTREACHED*/
4561}
4562
39236c6e
A
4563static uint64_t sched_maintenance_wakeups;
4564
4565/*
4566 * Determine if the set of routines formerly driven by a maintenance timer
4567 * must be invoked, based on a deadline comparison. Signals the scheduler
4568 * maintenance thread on deadline expiration. Must be invoked at an interval
4569 * lower than the "sched_tick_interval", currently accomplished by
4570 * invocation via the quantum expiration timer and at context switch time.
4571 * Performance matters: this routine reuses a timestamp approximating the
4572 * current absolute time received from the caller, and should perform
4573 * no more than a comparison against the deadline in the common case.
4574 */
4575void
3e170ce0 4576sched_timeshare_consider_maintenance(uint64_t ctime) {
39236c6e
A
4577 uint64_t ndeadline, deadline = sched_maintenance_deadline;
4578
4579 if (__improbable(ctime >= deadline)) {
4580 if (__improbable(current_thread() == sched_maintenance_thread))
4581 return;
4582 OSMemoryBarrier();
4583
4584 ndeadline = ctime + sched_tick_interval;
4585
4586 if (__probable(__sync_bool_compare_and_swap(&sched_maintenance_deadline, deadline, ndeadline))) {
3e170ce0 4587 thread_wakeup((event_t)sched_timeshare_maintenance_continue);
39236c6e
A
4588 sched_maintenance_wakeups++;
4589 }
4590 }
4591}
4592
fe8ab488 4593#endif /* CONFIG_SCHED_TIMESHARE_CORE */
6d2010ae 4594
1c79356b 4595void
6d2010ae 4596sched_init_thread(void (*continuation)(void))
1c79356b 4597{
316670eb 4598 thread_block(THREAD_CONTINUE_NULL);
91447636 4599
490019cf
A
4600 thread_t thread = current_thread();
4601
39037602
A
4602 thread_set_thread_name(thread, "sched_maintenance_thread");
4603
490019cf
A
4604 sched_maintenance_thread = thread;
4605
6d2010ae 4606 continuation();
1c79356b 4607
1c79356b
A
4608 /*NOTREACHED*/
4609}
4610
fe8ab488 4611#if defined(CONFIG_SCHED_TIMESHARE_CORE)
6d2010ae 4612
1c79356b 4613/*
91447636 4614 * thread_update_scan / runq_scan:
55e303ae 4615 *
91447636
A
4616 * Scan the run queues to account for timesharing threads
4617 * which need to be updated.
1c79356b
A
4618 *
4619 * Scanner runs in two passes. Pass one squirrels likely
91447636 4620 * threads away in an array, pass two does the update.
1c79356b 4621 *
91447636
A
4622 * This is necessary because the run queue is locked for
4623 * the candidate scan, but the thread is locked for the update.
1c79356b 4624 *
91447636
A
4625 * Array should be sized to make forward progress, without
4626 * disabling preemption for long periods.
1c79356b 4627 */
55e303ae 4628
91447636 4629#define THREAD_UPDATE_SIZE 128
55e303ae 4630
39037602
A
4631static thread_t thread_update_array[THREAD_UPDATE_SIZE];
4632static uint32_t thread_update_count = 0;
1c79356b 4633
fe8ab488
A
4634/* Returns TRUE if thread was added, FALSE if thread_update_array is full */
4635boolean_t
4636thread_update_add_thread(thread_t thread)
4637{
4638 if (thread_update_count == THREAD_UPDATE_SIZE)
4639 return (FALSE);
4640
4641 thread_update_array[thread_update_count++] = thread;
4642 thread_reference_internal(thread);
4643 return (TRUE);
4644}
4645
4646void
4647thread_update_process_threads(void)
4648{
39037602 4649 assert(thread_update_count <= THREAD_UPDATE_SIZE);
fe8ab488 4650
39037602
A
4651 for (uint32_t i = 0 ; i < thread_update_count ; i++) {
4652 thread_t thread = thread_update_array[i];
4653 assert_thread_magic(thread);
4654 thread_update_array[i] = THREAD_NULL;
4655
4656 spl_t s = splsched();
fe8ab488 4657 thread_lock(thread);
39037602 4658 if (!(thread->state & (TH_WAIT)) && thread->sched_stamp != sched_tick) {
fe8ab488
A
4659 SCHED(update_priority)(thread);
4660 }
4661 thread_unlock(thread);
4662 splx(s);
4663
4664 thread_deallocate(thread);
4665 }
39037602
A
4666
4667 thread_update_count = 0;
fe8ab488
A
4668}
4669
1c79356b 4670/*
91447636
A
4671 * Scan a runq for candidate threads.
4672 *
4673 * Returns TRUE if retry is needed.
1c79356b 4674 */
fe8ab488 4675boolean_t
91447636 4676runq_scan(
39037602
A
4677 run_queue_t runq,
4678 sched_update_scan_context_t scan_context)
1c79356b 4679{
39037602
A
4680 int count = runq->count;
4681 int queue_index;
1c79356b 4682
39037602
A
4683 assert(count >= 0);
4684
4685 if (count == 0)
4686 return FALSE;
4687
4688 for (queue_index = bitmap_first(runq->bitmap, NRQS);
4689 queue_index >= 0;
4690 queue_index = bitmap_next(runq->bitmap, queue_index)) {
4691
4692 thread_t thread;
4693 queue_t queue = &runq->queues[queue_index];
3e170ce0 4694
39037602
A
4695 qe_foreach_element(thread, queue, runq_links) {
4696 assert(count > 0);
4697 assert_thread_magic(thread);
4698
4699 if (thread->sched_stamp != sched_tick &&
4700 thread->sched_mode == TH_MODE_TIMESHARE) {
4701 if (thread_update_add_thread(thread) == FALSE)
4702 return TRUE;
1c79356b
A
4703 }
4704
39037602
A
4705 if (cpu_throttle_enabled && ((thread->sched_pri <= MAXPRI_THROTTLE) && (thread->base_pri <= MAXPRI_THROTTLE))) {
4706 if (thread->last_made_runnable_time < scan_context->earliest_bg_make_runnable_time) {
4707 scan_context->earliest_bg_make_runnable_time = thread->last_made_runnable_time;
4708 }
4709 } else {
4710 if (thread->last_made_runnable_time < scan_context->earliest_normal_make_runnable_time) {
4711 scan_context->earliest_normal_make_runnable_time = thread->last_made_runnable_time;
4712 }
4713 }
4714 count--;
1c79356b
A
4715 }
4716 }
1c79356b 4717
39037602 4718 return FALSE;
1c79356b
A
4719}
4720
fe8ab488
A
4721#endif /* CONFIG_SCHED_TIMESHARE_CORE */
4722
6d2010ae
A
4723boolean_t
4724thread_eager_preemption(thread_t thread)
4725{
4726 return ((thread->sched_flags & TH_SFLAG_EAGERPREEMPT) != 0);
4727}
4728
4729void
4730thread_set_eager_preempt(thread_t thread)
4731{
4732 spl_t x;
4733 processor_t p;
4734 ast_t ast = AST_NONE;
4735
4736 x = splsched();
4737 p = current_processor();
4738
4739 thread_lock(thread);
4740 thread->sched_flags |= TH_SFLAG_EAGERPREEMPT;
4741
4742 if (thread == current_thread()) {
6d2010ae 4743
fe8ab488
A
4744 ast = csw_check(p, AST_NONE);
4745 thread_unlock(thread);
6d2010ae
A
4746 if (ast != AST_NONE) {
4747 (void) thread_block_reason(THREAD_CONTINUE_NULL, NULL, ast);
4748 }
4749 } else {
4750 p = thread->last_processor;
4751
4752 if (p != PROCESSOR_NULL && p->state == PROCESSOR_RUNNING &&
4753 p->active_thread == thread) {
4754 cause_ast_check(p);
4755 }
1c79356b 4756
6d2010ae
A
4757 thread_unlock(thread);
4758 }
4759
4760 splx(x);
4761}
4762
4763void
4764thread_clear_eager_preempt(thread_t thread)
4765{
4766 spl_t x;
4767
4768 x = splsched();
4769 thread_lock(thread);
4770
4771 thread->sched_flags &= ~TH_SFLAG_EAGERPREEMPT;
4772
4773 thread_unlock(thread);
4774 splx(x);
4775}
3e170ce0 4776
6d2010ae
A
4777/*
4778 * Scheduling statistics
4779 */
4780void
4781sched_stats_handle_csw(processor_t processor, int reasons, int selfpri, int otherpri)
4782{
4783 struct processor_sched_statistics *stats;
4784 boolean_t to_realtime = FALSE;
4785
4786 stats = &processor->processor_data.sched_stats;
4787 stats->csw_count++;
4788
4789 if (otherpri >= BASEPRI_REALTIME) {
4790 stats->rt_sched_count++;
4791 to_realtime = TRUE;
4792 }
4793
4794 if ((reasons & AST_PREEMPT) != 0) {
4795 stats->preempt_count++;
4796
4797 if (selfpri >= BASEPRI_REALTIME) {
4798 stats->preempted_rt_count++;
4799 }
4800
4801 if (to_realtime) {
4802 stats->preempted_by_rt_count++;
4803 }
4804
4805 }
4806}
4807
4808void
4809sched_stats_handle_runq_change(struct runq_stats *stats, int old_count)
4810{
4811 uint64_t timestamp = mach_absolute_time();
4812
4813 stats->count_sum += (timestamp - stats->last_change_timestamp) * old_count;
4814 stats->last_change_timestamp = timestamp;
4815}
4816
1c79356b 4817/*
6d2010ae 4818 * For calls from assembly code
1c79356b 4819 */
6d2010ae 4820#undef thread_wakeup
1c79356b
A
4821void
4822thread_wakeup(
6d2010ae 4823 event_t x);
1c79356b
A
4824
4825void
4826thread_wakeup(
6d2010ae 4827 event_t x)
1c79356b 4828{
6d2010ae 4829 thread_wakeup_with_result(x, THREAD_AWAKENED);
1c79356b
A
4830}
4831
91447636
A
4832boolean_t
4833preemption_enabled(void)
4834{
4835 return (get_preemption_level() == 0 && ml_get_interrupts_enabled());
4836}
9bccf70c 4837
4b17d6b6
A
4838static void
4839sched_timer_deadline_tracking_init(void) {
4840 nanoseconds_to_absolutetime(TIMER_DEADLINE_TRACKING_BIN_1_DEFAULT, &timer_deadline_tracking_bin_1);
4841 nanoseconds_to_absolutetime(TIMER_DEADLINE_TRACKING_BIN_2_DEFAULT, &timer_deadline_tracking_bin_2);
4842}
3e170ce0
A
4843
4844
4845kern_return_t
4846sched_work_interval_notify(thread_t thread, uint64_t work_interval_id, uint64_t start, uint64_t finish, uint64_t deadline, uint64_t next_start, uint32_t flags)
4847{
4848 int urgency;
4849 uint64_t urgency_param1, urgency_param2;
4850 spl_t s;
4851
4852 if (work_interval_id == 0) {
4853 return (KERN_INVALID_ARGUMENT);
4854 }
4855
4856 assert(thread == current_thread());
4857
4858 thread_mtx_lock(thread);
4859 if (thread->work_interval_id != work_interval_id) {
4860 thread_mtx_unlock(thread);
4861 return (KERN_INVALID_ARGUMENT);
4862 }
4863 thread_mtx_unlock(thread);
4864
4865 s = splsched();
4866 thread_lock(thread);
4867 urgency = thread_get_urgency(thread, &urgency_param1, &urgency_param2);
4868 thread_unlock(thread);
4869 splx(s);
4870
4871 machine_work_interval_notify(thread, work_interval_id, start, finish, deadline, next_start, urgency, flags);
4872 return (KERN_SUCCESS);
4873}
4874
4875void thread_set_options(uint32_t thopt) {
4876 spl_t x;
4877 thread_t t = current_thread();
4878
4879 x = splsched();
4880 thread_lock(t);
4881
4882 t->options |= thopt;
4883
4884 thread_unlock(t);
4885 splx(x);
4886}
813fb2f6
A
4887
4888void thread_set_pending_block_hint(thread_t thread, block_hint_t block_hint) {
4889 thread->pending_block_hint = block_hint;
4890}